SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
Gaussian.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Alesis Novik
8  * Written (W) 2014 Parijat Mazumdar
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #ifdef HAVE_LAPACK
14 
17 #include <shogun/base/Parameter.h>
18 
19 using namespace shogun;
20 
21 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
22 {
23  register_params();
24 }
25 
27  : CDistribution()
28 {
29  ASSERT(mean.vlen==cov.num_rows)
30  ASSERT(cov.num_rows==cov.num_cols)
33  m_cov_type=cov_type;
34 
35  m_mean=mean;
36 
37  if (cov.num_rows==1)
38  m_cov_type=SPHERICAL;
39 
40  decompose_cov(cov);
41  init();
42  register_params();
43 }
44 
46 {
48  switch (m_cov_type)
49  {
50  case FULL:
51  case DIAG:
52  for (int32_t i=0; i<m_d.vlen; i++)
54  break;
55  case SPHERICAL:
57  break;
58  }
59 }
60 
62 {
63 }
64 
66 {
67  // init features with data if necessary and assure type is correct
68  if (data)
69  {
70  if (!data->has_property(FP_DOT))
71  SG_ERROR("Specified features are not of type CDotFeatures\n")
72  set_features(data);
73  }
74 
75  CDotFeatures* dotdata=(CDotFeatures *) data;
76  m_mean=dotdata->get_mean();
77  SGMatrix<float64_t> cov=dotdata->get_cov();
78  decompose_cov(cov);
79  init();
80  return true;
81 }
82 
84 {
85  switch (m_cov_type)
86  {
87  case FULL:
89  case DIAG:
90  return m_d.vlen+m_mean.vlen;
91  case SPHERICAL:
92  return 1+m_mean.vlen;
93  }
94  return 0;
95 }
96 
98 {
100  return 0;
101 }
102 
103 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example)
104 {
106  return 0;
107 }
108 
110 {
112  SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example);
113  float64_t answer=compute_log_PDF(v);
114  return answer;
115 }
116 
118 {
119  CDotFeatures* dotdata=dynamic_cast<CDotFeatures *>(features);
120  REQUIRE(dotdata,"dynamic cast from CFeatures to CDotFeatures returned NULL\n")
121  int32_t num_dim=dotdata->get_dim_feature_space();
122 
123  // compute mean
124 
125  float64_t alpha_k_sum=0;
126  SGVector<float64_t> mean(num_dim);
127  mean.fill_vector(mean.vector,mean.vlen,0);
128  for (int32_t i=0;i<len;i++)
129  {
130  alpha_k_sum+=alpha_k[i];
132  SGVector<float64_t>::add(mean.vector, alpha_k[i], v.vector, 1, mean.vector, v.vlen);
133  }
134 
135  for (int32_t i=0; i<num_dim; i++)
136  mean[i]/=alpha_k_sum;
137 
138  set_mean(mean);
139 
140  // compute covariance matrix
141 
142  float64_t* cov_sum=NULL;
143  ECovType cov_type=get_cov_type();
144  if (cov_type==FULL)
145  {
146  cov_sum=SG_MALLOC(float64_t, num_dim*num_dim);
147  memset(cov_sum, 0, num_dim*num_dim*sizeof(float64_t));
148  }
149  else if(cov_type==DIAG)
150  {
151  cov_sum=SG_MALLOC(float64_t,num_dim);
152  memset(cov_sum, 0, num_dim*sizeof(float64_t));
153  }
154  else if(cov_type==SPHERICAL)
155  {
156  cov_sum=SG_MALLOC(float64_t,1);
157  cov_sum[0]=0;
158  }
159 
160  for (int32_t j=0; j<len; j++)
161  {
163  SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
164 
165  switch (cov_type)
166  {
167  case FULL:
168  cblas_dger(CblasRowMajor, num_dim, num_dim, alpha_k[j], v.vector, 1, v.vector,
169  1, (double*) cov_sum, num_dim);
170 
171  break;
172  case DIAG:
173  for (int32_t k=0; k<num_dim; k++)
174  cov_sum[k]+=v.vector[k]*v.vector[k]*alpha_k[j];
175 
176  break;
177  case SPHERICAL:
178  float64_t temp=0;
179 
180  for (int32_t k=0; k<num_dim; k++)
181  temp+=v.vector[k]*v.vector[k];
182 
183  cov_sum[0]+=temp*alpha_k[j];
184  break;
185  }
186  }
187 
188  switch (cov_type)
189  {
190  case FULL:
191  for (int32_t j=0; j<num_dim*num_dim; j++)
192  cov_sum[j]/=alpha_k_sum;
193 
194  float64_t* d0;
195  d0=SGMatrix<float64_t>::compute_eigenvectors(cov_sum, num_dim, num_dim);
196 
197  set_d(SGVector<float64_t>(d0, num_dim));
198  set_u(SGMatrix<float64_t>(cov_sum, num_dim, num_dim));
199 
200  break;
201 
202  case DIAG:
203  for (int32_t j=0; j<num_dim; j++)
204  cov_sum[j]/=alpha_k_sum;
205 
206  set_d(SGVector<float64_t>(cov_sum,num_dim));
207 
208  break;
209 
210  case SPHERICAL:
211  cov_sum[0]/=alpha_k_sum*num_dim;
212 
213  set_d(SGVector<float64_t>(cov_sum,1));
214 
215  break;
216  }
217 
218  return alpha_k_sum;
219 }
220 
222 {
224  ASSERT(point.vlen == m_mean.vlen)
225  float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
226  memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
227 
228  for (int32_t i = 0; i < m_mean.vlen; i++)
229  difference[i] -= m_mean.vector[i];
230 
231  float64_t answer=m_constant;
232 
233  if (m_cov_type==FULL)
234  {
235  float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
236  cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
237  1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
238 
239  for (int32_t i=0; i<m_d.vlen; i++)
240  answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
241 
242  SG_FREE(temp_holder);
243  }
244  else if (m_cov_type==DIAG)
245  {
246  for (int32_t i=0; i<m_mean.vlen; i++)
247  answer+=difference[i]*difference[i]/m_d.vector[i];
248  }
249  else
250  {
251  for (int32_t i=0; i<m_mean.vlen; i++)
252  answer+=difference[i]*difference[i]/m_d.vector[0];
253  }
254 
255  SG_FREE(difference);
256 
257  return -0.5*answer;
258 }
259 
261 {
262  return m_mean;
263 }
264 
266 {
267  if (mean.vlen==1)
269 
270  m_mean=mean;
271 }
272 
274 {
275  ASSERT(cov.num_rows==cov.num_cols)
276  ASSERT(cov.num_rows==m_mean.vlen)
277  decompose_cov(cov);
278  init();
279 }
280 
282 {
283  m_d = d;
284  init();
285 }
286 
288 {
289  float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
290  memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
291 
292  if (m_cov_type==FULL)
293  {
294  if (!m_u.matrix)
295  SG_ERROR("Unitary matrix not set\n")
296 
297  float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
298  float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
299  memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
300  for(int32_t i=0; i<m_d.vlen; i++)
301  diag_holder[i*m_d.vlen+i]=m_d.vector[i];
302 
303  cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
305  diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
306  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
307  m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
308  m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
309 
310  SG_FREE(diag_holder);
311  SG_FREE(temp_holder);
312  }
313  else if (m_cov_type==DIAG)
314  {
315  for (int32_t i=0; i<m_d.vlen; i++)
316  cov[i*m_d.vlen+i]=m_d.vector[i];
317  }
318  else
319  {
320  for (int32_t i=0; i<m_mean.vlen; i++)
321  cov[i*m_mean.vlen+i]=m_d.vector[0];
322  }
323  return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed
324 }
325 
326 void CGaussian::register_params()
327 {
328  SG_ADD(&m_u, "m_u", "Unitary matrix.",MS_NOT_AVAILABLE);
329  SG_ADD(&m_d, "m_d", "Diagonal.",MS_NOT_AVAILABLE);
330  SG_ADD(&m_mean, "m_mean", "Mean.",MS_NOT_AVAILABLE);
331  SG_ADD(&m_constant, "m_constant", "Constant part.",MS_NOT_AVAILABLE);
332  SG_ADD((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type.",MS_NOT_AVAILABLE);
333 }
334 
335 void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
336 {
337  switch (m_cov_type)
338  {
339  case FULL:
341  memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
342 
344  m_d.vlen=cov.num_rows;
345  m_u.num_rows=cov.num_rows;
346  m_u.num_cols=cov.num_rows;
347  break;
348  case DIAG:
350  for (int32_t i=0; i<cov.num_rows; i++)
351  m_d[i]=cov.matrix[i*cov.num_rows+i];
352 
353  break;
354  case SPHERICAL:
356  m_d.vector[0]=cov.matrix[0];
357  break;
358  }
359 }
360 
362 {
363  SG_DEBUG("Entering\n");
364  float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
365  memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
366 
367  switch (m_cov_type)
368  {
369  case FULL:
370  case DIAG:
371  for (int32_t i=0; i<m_mean.vlen; i++)
372  r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
373 
374  break;
375  case SPHERICAL:
376  for (int32_t i=0; i<m_mean.vlen; i++)
377  r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
378 
379  break;
380  }
381 
382  float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
383 
384  for (int32_t i=0; i<m_mean.vlen; i++)
385  random_vec[i]=CMath::randn_double();
386 
387  if (m_cov_type==FULL)
388  {
389  float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
390  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
392  r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
393  SG_FREE(r_matrix);
394  r_matrix=temp_matrix;
395  }
396 
397  float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
398 
399  cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
400  1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
401 
402  for (int32_t i=0; i<m_mean.vlen; i++)
403  samp[i]+=m_mean.vector[i];
404 
405  SG_FREE(random_vec);
406  SG_FREE(r_matrix);
407 
408  SG_DEBUG("Leaving\n");
409  return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed
410 }
411 
413 {
414  if (!distribution)
415  return NULL;
416 
417  CGaussian* casted=dynamic_cast<CGaussian*>(distribution);
418  if (!casted)
419  return NULL;
420 
421  /* since an additional reference is returned */
422  SG_REF(casted);
423  return casted;
424 }
425 
426 #endif // HAVE_LAPACK
SGVector< float64_t > sample()
Definition: Gaussian.cpp:361
float64_t m_constant
Definition: Gaussian.h:238
void set_u(SGMatrix< float64_t > u)
Definition: Gaussian.h:206
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:223
virtual void set_features(CFeatures *f)
Definition: Distribution.h:160
Gaussian distribution interface.
Definition: Gaussian.h:50
ECovType get_cov_type()
Definition: Gaussian.h:162
virtual bool train(CFeatures *data=NULL)
Definition: Gaussian.cpp:65
static float64_t randn_double()
Definition: Math.h:1132
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:378
virtual float64_t compute_log_PDF(SGVector< float64_t > point)
Definition: Gaussian.cpp:221
Base class Distribution from which all methods implementing a distribution are derived.
Definition: Distribution.h:44
ECovType m_cov_type
Definition: Gaussian.h:246
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
full covariance
Definition: Gaussian.h:36
virtual int32_t get_dim_feature_space() const =0
spherical covariance
Definition: Gaussian.h:40
virtual float64_t update_params_em(float64_t *alpha_k, int32_t len)
Definition: Gaussian.cpp:117
virtual SGVector< float64_t > get_mean()
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_u
Definition: Gaussian.h:242
#define ASSERT(x)
Definition: SGIO.h:201
virtual SGVector< float64_t > get_mean()
Definition: Gaussian.cpp:260
virtual float64_t get_log_model_parameter(int32_t num_param)
Definition: Gaussian.cpp:97
static CGaussian * obtain_from_generic(CDistribution *distribution)
Definition: Gaussian.cpp:412
virtual void set_cov(SGMatrix< float64_t > cov)
Definition: Gaussian.cpp:273
double float64_t
Definition: common.h:50
ECovType
Definition: Gaussian.h:33
SGVector< float64_t > m_mean
Definition: Gaussian.h:244
virtual SGMatrix< float64_t > get_cov()
Definition: Gaussian.cpp:287
#define M_PI
workaround for log2 being a define on cygwin
Definition: Math.h:59
virtual ~CGaussian()
Definition: Gaussian.cpp:61
diagonal covariance
Definition: Gaussian.h:38
virtual float64_t get_log_likelihood_example(int32_t num_example)
Definition: Gaussian.cpp:109
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
int machine_int_t
Definition: common.h:59
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t log(float64_t v)
Definition: Math.h:922
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
virtual void set_mean(const SGVector< float64_t > mean)
Definition: Gaussian.cpp:265
#define SG_ADD(...)
Definition: SGObject.h:81
static float32_t sqrt(float32_t x)
Definition: Math.h:459
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual int32_t get_num_model_parameters()
Definition: Gaussian.cpp:83
static SGVector< float64_t > compute_eigenvectors(SGMatrix< float64_t > matrix)
Definition: SGMatrix.cpp:895
void add(const SGVector< T > x)
Definition: SGVector.cpp:281
virtual SGMatrix< float64_t > get_cov()
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
Definition: Gaussian.cpp:103
void set_d(const SGVector< float64_t > d)
Definition: Gaussian.cpp:281
SGVector< float64_t > m_d
Definition: Gaussian.h:240

SHOGUN 机器学习工具包 - 项目文档