00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <shogun/lib/config.h>
00011
00012 #ifdef HAVE_LAPACK
00013
00014 #include <shogun/distributions/Gaussian.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/base/Parameter.h>
00017
00018 using namespace shogun;
00019
00020 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
00021 {
00022 register_params();
00023 }
00024
00025 CGaussian::CGaussian(SGVector<float64_t> mean, SGMatrix<float64_t> cov,
00026 ECovType cov_type) : CDistribution(), m_d(), m_u(), m_cov_type(cov_type)
00027 {
00028 ASSERT(mean.vlen==cov.num_rows);
00029 ASSERT(cov.num_rows==cov.num_cols);
00030
00031 m_mean=mean;
00032
00033 if (cov.num_rows==1)
00034 m_cov_type=SPHERICAL;
00035
00036 decompose_cov(cov);
00037 init();
00038 register_params();
00039
00040 if (cov.do_free)
00041 cov.free_matrix();
00042 }
00043
00044 void CGaussian::init()
00045 {
00046 m_constant=CMath::log(2*M_PI)*m_mean.vlen;
00047 switch (m_cov_type)
00048 {
00049 case FULL:
00050 case DIAG:
00051 for (int32_t i=0; i<m_d.vlen; i++)
00052 m_constant+=CMath::log(m_d.vector[i]);
00053 break;
00054 case SPHERICAL:
00055 m_constant+=m_mean.vlen*CMath::log(m_d.vector[0]);
00056 break;
00057 }
00058 }
00059
00060 CGaussian::~CGaussian()
00061 {
00062 m_d.destroy_vector();
00063 m_u.destroy_matrix();
00064 m_mean.free_vector();
00065 }
00066
00067 bool CGaussian::train(CFeatures* data)
00068 {
00069
00070 if (data)
00071 {
00072 if (!data->has_property(FP_DOT))
00073 SG_ERROR("Specified features are not of type CDotFeatures\n");
00074 set_features(data);
00075 }
00076 CDotFeatures* dotdata=(CDotFeatures *) data;
00077
00078 m_mean.destroy_vector();
00079
00080 m_mean=dotdata->get_mean();
00081 SGMatrix<float64_t> cov=dotdata->get_cov();
00082
00083 decompose_cov(cov);
00084 cov.destroy_matrix();
00085
00086 init();
00087
00088 return true;
00089 }
00090
00091 int32_t CGaussian::get_num_model_parameters()
00092 {
00093 switch (m_cov_type)
00094 {
00095 case FULL:
00096 return m_u.num_rows*m_u.num_cols+m_d.vlen+m_mean.vlen;
00097 case DIAG:
00098 return m_d.vlen+m_mean.vlen;
00099 case SPHERICAL:
00100 return 1+m_mean.vlen;
00101 }
00102 return 0;
00103 }
00104
00105 float64_t CGaussian::get_log_model_parameter(int32_t num_param)
00106 {
00107 SG_NOTIMPLEMENTED;
00108 return 0;
00109 }
00110
00111 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example)
00112 {
00113 SG_NOTIMPLEMENTED;
00114 return 0;
00115 }
00116
00117 float64_t CGaussian::get_log_likelihood_example(int32_t num_example)
00118 {
00119 ASSERT(features->has_property(FP_DOT));
00120 SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example);
00121 float64_t answer=compute_log_PDF(v);
00122 v.free_vector();
00123 return answer;
00124 }
00125
00126 float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point)
00127 {
00128 ASSERT(m_mean.vector && m_d.vector);
00129 ASSERT(point.vlen == m_mean.vlen);
00130 float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
00131 memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
00132
00133 for (int32_t i = 0; i < m_mean.vlen; i++)
00134 difference[i] -= m_mean.vector[i];
00135
00136 float64_t answer=m_constant;
00137
00138 if (m_cov_type==FULL)
00139 {
00140 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
00141 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
00142 1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
00143
00144 for (int32_t i=0; i<m_d.vlen; i++)
00145 answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
00146
00147 SG_FREE(temp_holder);
00148 }
00149 else if (m_cov_type==DIAG)
00150 {
00151 for (int32_t i=0; i<m_mean.vlen; i++)
00152 answer+=difference[i]*difference[i]/m_d.vector[i];
00153 }
00154 else
00155 {
00156 for (int32_t i=0; i<m_mean.vlen; i++)
00157 answer+=difference[i]*difference[i]/m_d.vector[0];
00158 }
00159
00160 SG_FREE(difference);
00161
00162 return -0.5*answer;
00163 }
00164
00165 SGMatrix<float64_t> CGaussian::get_cov()
00166 {
00167 float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
00168 memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
00169
00170 if (m_cov_type==FULL)
00171 {
00172 if (!m_u.matrix)
00173 SG_ERROR("Unitary matrix not set\n");
00174
00175 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00176 float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00177 memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
00178 for(int32_t i=0; i<m_d.vlen; i++)
00179 diag_holder[i*m_d.vlen+i]=m_d.vector[i];
00180
00181 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
00182 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
00183 diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
00184 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00185 m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
00186 m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
00187
00188 SG_FREE(diag_holder);
00189 SG_FREE(temp_holder);
00190 }
00191 else if (m_cov_type==DIAG)
00192 {
00193 for (int32_t i=0; i<m_d.vlen; i++)
00194 cov[i*m_d.vlen+i]=m_d.vector[i];
00195 }
00196 else
00197 {
00198 for (int32_t i=0; i<m_mean.vlen; i++)
00199 cov[i*m_mean.vlen+i]=m_d.vector[0];
00200 }
00201 return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);
00202 }
00203
00204 void CGaussian::register_params()
00205 {
00206 m_parameters->add(&m_u, "m_u", "Unitary matrix.");
00207 m_parameters->add(&m_d, "m_d", "Diagonal.");
00208 m_parameters->add(&m_mean, "m_mean", "Mean.");
00209 m_parameters->add(&m_constant, "m_constant", "Constant part.");
00210 m_parameters->add((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type.");
00211 }
00212
00213 void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
00214 {
00215 m_d.destroy_vector();
00216 switch (m_cov_type)
00217 {
00218 case FULL:
00219 m_u.destroy_matrix();
00220 m_u.matrix=SG_MALLOC(float64_t, cov.num_rows*cov.num_rows);
00221 memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
00222
00223 m_d.vector=CMath::compute_eigenvectors(m_u.matrix, cov.num_rows, cov.num_rows);
00224 m_d.vlen=cov.num_rows;
00225 m_u.num_rows=cov.num_rows;
00226 m_u.num_cols=cov.num_rows;
00227 break;
00228 case DIAG:
00229 m_d.vector=SG_MALLOC(float64_t, cov.num_rows);
00230
00231 for (int32_t i=0; i<cov.num_rows; i++)
00232 m_d.vector[i]=cov.matrix[i*cov.num_rows+i];
00233
00234 m_d.vlen=cov.num_rows;
00235 break;
00236 case SPHERICAL:
00237 m_d.vector=SG_MALLOC(float64_t, 1);
00238
00239 m_d.vector[0]=cov.matrix[0];
00240 m_d.vlen=1;
00241 break;
00242 }
00243 }
00244
00245 SGVector<float64_t> CGaussian::sample()
00246 {
00247 float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
00248 memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
00249
00250 switch (m_cov_type)
00251 {
00252 case FULL:
00253 case DIAG:
00254 for (int32_t i=0; i<m_mean.vlen; i++)
00255 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
00256
00257 break;
00258 case SPHERICAL:
00259 for (int32_t i=0; i<m_mean.vlen; i++)
00260 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
00261
00262 break;
00263 }
00264
00265 float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
00266
00267 for (int32_t i=0; i<m_mean.vlen; i++)
00268 random_vec[i]=CMath::randn_double();
00269
00270 if (m_cov_type==FULL)
00271 {
00272 float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00273 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00274 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
00275 r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
00276 SG_FREE(r_matrix);
00277 r_matrix=temp_matrix;
00278 }
00279
00280 float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
00281
00282 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
00283 1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
00284
00285 for (int32_t i=0; i<m_mean.vlen; i++)
00286 samp[i]+=m_mean.vector[i];
00287
00288 SG_FREE(random_vec);
00289 SG_FREE(r_matrix);
00290
00291 return SGVector<float64_t>(samp, m_mean.vlen, false);
00292 }
00293
00294 #endif