SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Gaussian.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Alesis Novik
8  * Written (W) 2014 Parijat Mazumdar
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #ifdef HAVE_LAPACK
14 
17 #include <shogun/base/Parameter.h>
19 
20 using namespace shogun;
21 
22 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
23 {
24  register_params();
25 }
26 
28  : CDistribution()
29 {
30  ASSERT(mean.vlen==cov.num_rows)
31  ASSERT(cov.num_rows==cov.num_cols)
34  m_cov_type=cov_type;
35 
36  m_mean=mean;
37 
38  if (cov.num_rows==1)
39  m_cov_type=SPHERICAL;
40 
41  decompose_cov(cov);
42  init();
43  register_params();
44 }
45 
47 {
49  switch (m_cov_type)
50  {
51  case FULL:
52  case DIAG:
53  for (int32_t i=0; i<m_d.vlen; i++)
55  break;
56  case SPHERICAL:
58  break;
59  }
60 }
61 
63 {
64 }
65 
67 {
68  // init features with data if necessary and assure type is correct
69  if (data)
70  {
71  if (!data->has_property(FP_DOT))
72  SG_ERROR("Specified features are not of type CDotFeatures\n")
73  set_features(data);
74  }
75 
76  CDotFeatures* dotdata=(CDotFeatures *) data;
77  m_mean=dotdata->get_mean();
78  SGMatrix<float64_t> cov=dotdata->get_cov();
79  decompose_cov(cov);
80  init();
81  return true;
82 }
83 
85 {
86  switch (m_cov_type)
87  {
88  case FULL:
90  case DIAG:
91  return m_d.vlen+m_mean.vlen;
92  case SPHERICAL:
93  return 1+m_mean.vlen;
94  }
95  return 0;
96 }
97 
99 {
101  return 0;
102 }
103 
104 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example)
105 {
107  return 0;
108 }
109 
111 {
113  SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example);
114  float64_t answer=compute_log_PDF(v);
115  return answer;
116 }
117 
119 {
120  CDotFeatures* dotdata=dynamic_cast<CDotFeatures *>(features);
121  REQUIRE(dotdata,"dynamic cast from CFeatures to CDotFeatures returned NULL\n")
122  int32_t num_dim=dotdata->get_dim_feature_space();
123 
124  // compute mean
125 
126  float64_t alpha_k_sum=0;
127  SGVector<float64_t> mean(num_dim);
128  mean.fill_vector(mean.vector,mean.vlen,0);
129  for (int32_t i=0;i<len;i++)
130  {
131  alpha_k_sum+=alpha_k[i];
133  SGVector<float64_t>::add(mean.vector, alpha_k[i], v.vector, 1, mean.vector, v.vlen);
134  }
135 
136  for (int32_t i=0; i<num_dim; i++)
137  mean[i]/=alpha_k_sum;
138 
139  set_mean(mean);
140 
141  // compute covariance matrix
142 
143  float64_t* cov_sum=NULL;
144  ECovType cov_type=get_cov_type();
145  if (cov_type==FULL)
146  {
147  cov_sum=SG_MALLOC(float64_t, num_dim*num_dim);
148  memset(cov_sum, 0, num_dim*num_dim*sizeof(float64_t));
149  }
150  else if(cov_type==DIAG)
151  {
152  cov_sum=SG_MALLOC(float64_t,num_dim);
153  memset(cov_sum, 0, num_dim*sizeof(float64_t));
154  }
155  else if(cov_type==SPHERICAL)
156  {
157  cov_sum=SG_MALLOC(float64_t,1);
158  cov_sum[0]=0;
159  }
160 
161  for (int32_t j=0; j<len; j++)
162  {
164  SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
165 
166  switch (cov_type)
167  {
168  case FULL:
169  cblas_dger(CblasRowMajor, num_dim, num_dim, alpha_k[j], v.vector, 1, v.vector,
170  1, (double*) cov_sum, num_dim);
171 
172  break;
173  case DIAG:
174  for (int32_t k=0; k<num_dim; k++)
175  cov_sum[k]+=v.vector[k]*v.vector[k]*alpha_k[j];
176 
177  break;
178  case SPHERICAL:
179  float64_t temp=0;
180 
181  for (int32_t k=0; k<num_dim; k++)
182  temp+=v.vector[k]*v.vector[k];
183 
184  cov_sum[0]+=temp*alpha_k[j];
185  break;
186  }
187  }
188 
189  switch (cov_type)
190  {
191  case FULL:
192  for (int32_t j=0; j<num_dim*num_dim; j++)
193  cov_sum[j]/=alpha_k_sum;
194 
195  float64_t* d0;
196  d0=SGMatrix<float64_t>::compute_eigenvectors(cov_sum, num_dim, num_dim);
197 
198  set_d(SGVector<float64_t>(d0, num_dim));
199  set_u(SGMatrix<float64_t>(cov_sum, num_dim, num_dim));
200 
201  break;
202 
203  case DIAG:
204  for (int32_t j=0; j<num_dim; j++)
205  cov_sum[j]/=alpha_k_sum;
206 
207  set_d(SGVector<float64_t>(cov_sum,num_dim));
208 
209  break;
210 
211  case SPHERICAL:
212  cov_sum[0]/=alpha_k_sum*num_dim;
213 
214  set_d(SGVector<float64_t>(cov_sum,1));
215 
216  break;
217  }
218 
219  return alpha_k_sum;
220 }
221 
223 {
225  ASSERT(point.vlen == m_mean.vlen)
226  float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
227  memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
228 
229  for (int32_t i = 0; i < m_mean.vlen; i++)
230  difference[i] -= m_mean.vector[i];
231 
232  float64_t answer=m_constant;
233 
234  if (m_cov_type==FULL)
235  {
236  float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
237  cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
238  1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
239 
240  for (int32_t i=0; i<m_d.vlen; i++)
241  answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
242 
243  SG_FREE(temp_holder);
244  }
245  else if (m_cov_type==DIAG)
246  {
247  for (int32_t i=0; i<m_mean.vlen; i++)
248  answer+=difference[i]*difference[i]/m_d.vector[i];
249  }
250  else
251  {
252  for (int32_t i=0; i<m_mean.vlen; i++)
253  answer+=difference[i]*difference[i]/m_d.vector[0];
254  }
255 
256  SG_FREE(difference);
257 
258  return -0.5*answer;
259 }
260 
262 {
263  return m_mean;
264 }
265 
267 {
268  if (mean.vlen==1)
270 
271  m_mean=mean;
272 }
273 
275 {
276  ASSERT(cov.num_rows==cov.num_cols)
277  ASSERT(cov.num_rows==m_mean.vlen)
278  decompose_cov(cov);
279  init();
280 }
281 
283 {
284  m_d = d;
285  init();
286 }
287 
289 {
290  float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
291  memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
292 
293  if (m_cov_type==FULL)
294  {
295  if (!m_u.matrix)
296  SG_ERROR("Unitary matrix not set\n")
297 
298  float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
299  float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
300  memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
301  for(int32_t i=0; i<m_d.vlen; i++)
302  diag_holder[i*m_d.vlen+i]=m_d.vector[i];
303 
304  cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
306  diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
307  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
308  m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
309  m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
310 
311  SG_FREE(diag_holder);
312  SG_FREE(temp_holder);
313  }
314  else if (m_cov_type==DIAG)
315  {
316  for (int32_t i=0; i<m_d.vlen; i++)
317  cov[i*m_d.vlen+i]=m_d.vector[i];
318  }
319  else
320  {
321  for (int32_t i=0; i<m_mean.vlen; i++)
322  cov[i*m_mean.vlen+i]=m_d.vector[0];
323  }
324  return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed
325 }
326 
327 void CGaussian::register_params()
328 {
329  SG_ADD(&m_u, "m_u", "Unitary matrix.",MS_NOT_AVAILABLE);
330  SG_ADD(&m_d, "m_d", "Diagonal.",MS_NOT_AVAILABLE);
331  SG_ADD(&m_mean, "m_mean", "Mean.",MS_NOT_AVAILABLE);
332  SG_ADD(&m_constant, "m_constant", "Constant part.",MS_NOT_AVAILABLE);
333  SG_ADD((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type.",MS_NOT_AVAILABLE);
334 }
335 
336 void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
337 {
338  switch (m_cov_type)
339  {
340  case FULL:
342  memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
343 
345  m_d.vlen=cov.num_rows;
346  m_u.num_rows=cov.num_rows;
347  m_u.num_cols=cov.num_rows;
348  break;
349  case DIAG:
351  for (int32_t i=0; i<cov.num_rows; i++)
352  m_d[i]=cov.matrix[i*cov.num_rows+i];
353 
354  break;
355  case SPHERICAL:
357  m_d.vector[0]=cov.matrix[0];
358  break;
359  }
360 }
361 
363 {
364  SG_DEBUG("Entering\n");
365  float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
366  memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
367 
368  switch (m_cov_type)
369  {
370  case FULL:
371  case DIAG:
372  for (int32_t i=0; i<m_mean.vlen; i++)
373  r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
374 
375  break;
376  case SPHERICAL:
377  for (int32_t i=0; i<m_mean.vlen; i++)
378  r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
379 
380  break;
381  }
382 
383  float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
384 
385  for (int32_t i=0; i<m_mean.vlen; i++)
386  random_vec[i]=CMath::randn_double();
387 
388  if (m_cov_type==FULL)
389  {
390  float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
391  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
393  r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
394  SG_FREE(r_matrix);
395  r_matrix=temp_matrix;
396  }
397 
398  float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
399 
400  cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
401  1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
402 
403  for (int32_t i=0; i<m_mean.vlen; i++)
404  samp[i]+=m_mean.vector[i];
405 
406  SG_FREE(random_vec);
407  SG_FREE(r_matrix);
408 
409  SG_DEBUG("Leaving\n");
410  return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed
411 }
412 
414 {
415  if (!distribution)
416  return NULL;
417 
418  CGaussian* casted=dynamic_cast<CGaussian*>(distribution);
419  if (!casted)
420  return NULL;
421 
422  /* since an additional reference is returned */
423  SG_REF(casted);
424  return casted;
425 }
426 
427 #endif // HAVE_LAPACK
SGVector< float64_t > sample()
Definition: Gaussian.cpp:362
float64_t m_constant
Definition: Gaussian.h:237
void set_u(SGMatrix< float64_t > u)
Definition: Gaussian.h:205
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:221
virtual void set_features(CFeatures *f)
Definition: Distribution.h:160
Gaussian distribution interface.
Definition: Gaussian.h:49
ECovType get_cov_type()
Definition: Gaussian.h:161
virtual bool train(CFeatures *data=NULL)
Definition: Gaussian.cpp:66
static float64_t randn_double()
Definition: Math.h:1132
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:376
virtual float64_t compute_log_PDF(SGVector< float64_t > point)
Definition: Gaussian.cpp:222
Base class Distribution from which all methods implementing a distribution are derived.
Definition: Distribution.h:44
ECovType m_cov_type
Definition: Gaussian.h:245
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
full covariance
Definition: Gaussian.h:35
virtual int32_t get_dim_feature_space() const =0
spherical covariance
Definition: Gaussian.h:39
virtual float64_t update_params_em(float64_t *alpha_k, int32_t len)
Definition: Gaussian.cpp:118
virtual SGVector< float64_t > get_mean()
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_u
Definition: Gaussian.h:241
#define ASSERT(x)
Definition: SGIO.h:201
virtual SGVector< float64_t > get_mean()
Definition: Gaussian.cpp:261
virtual float64_t get_log_model_parameter(int32_t num_param)
Definition: Gaussian.cpp:98
static CGaussian * obtain_from_generic(CDistribution *distribution)
Definition: Gaussian.cpp:413
virtual void set_cov(SGMatrix< float64_t > cov)
Definition: Gaussian.cpp:274
double float64_t
Definition: common.h:50
ECovType
Definition: Gaussian.h:32
SGVector< float64_t > m_mean
Definition: Gaussian.h:243
virtual SGMatrix< float64_t > get_cov()
Definition: Gaussian.cpp:288
#define M_PI
workaround for log2 being a define on cygwin
Definition: Math.h:59
virtual ~CGaussian()
Definition: Gaussian.cpp:62
diagonal covariance
Definition: Gaussian.h:37
virtual float64_t get_log_likelihood_example(int32_t num_example)
Definition: Gaussian.cpp:110
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
int machine_int_t
Definition: common.h:59
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t log(float64_t v)
Definition: Math.h:922
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
virtual void set_mean(const SGVector< float64_t > mean)
Definition: Gaussian.cpp:266
#define SG_ADD(...)
Definition: SGObject.h:84
static float32_t sqrt(float32_t x)
Definition: Math.h:459
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual int32_t get_num_model_parameters()
Definition: Gaussian.cpp:84
static SGVector< float64_t > compute_eigenvectors(SGMatrix< float64_t > matrix)
Definition: SGMatrix.cpp:891
void add(const SGVector< T > x)
Definition: SGVector.cpp:279
virtual SGMatrix< float64_t > get_cov()
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
Definition: Gaussian.cpp:104
void set_d(const SGVector< float64_t > d)
Definition: Gaussian.cpp:282
SGVector< float64_t > m_d
Definition: Gaussian.h:239

SHOGUN Machine Learning Toolbox - Documentation