00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <shogun/lib/config.h>
00011
00012 #ifdef HAVE_LAPACK
00013
00014 #include <shogun/distributions/Gaussian.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/base/Parameter.h>
00017
00018 using namespace shogun;
00019
00020 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
00021 {
00022 register_params();
00023 }
00024
00025 CGaussian::CGaussian(const SGVector<float64_t> mean, SGMatrix<float64_t> cov,
00026 ECovType cov_type) : CDistribution(), m_d(), m_u(), m_cov_type(cov_type)
00027 {
00028 ASSERT(mean.vlen==cov.num_rows);
00029 ASSERT(cov.num_rows==cov.num_cols);
00030
00031 m_mean=mean;
00032
00033 if (cov.num_rows==1)
00034 m_cov_type=SPHERICAL;
00035
00036 decompose_cov(cov);
00037 init();
00038 register_params();
00039 }
00040
00041 void CGaussian::init()
00042 {
00043 m_constant=CMath::log(2*M_PI)*m_mean.vlen;
00044 switch (m_cov_type)
00045 {
00046 case FULL:
00047 case DIAG:
00048 for (int32_t i=0; i<m_d.vlen; i++)
00049 m_constant+=CMath::log(m_d.vector[i]);
00050 break;
00051 case SPHERICAL:
00052 m_constant+=m_mean.vlen*CMath::log(m_d.vector[0]);
00053 break;
00054 }
00055 }
00056
00057 CGaussian::~CGaussian()
00058 {
00059 }
00060
00061 bool CGaussian::train(CFeatures* data)
00062 {
00063
00064 if (data)
00065 {
00066 if (!data->has_property(FP_DOT))
00067 SG_ERROR("Specified features are not of type CDotFeatures\n");
00068 set_features(data);
00069 }
00070
00071 CDotFeatures* dotdata=(CDotFeatures *) data;
00072 m_mean=dotdata->get_mean();
00073 SGMatrix<float64_t> cov=dotdata->get_cov();
00074 decompose_cov(cov);
00075 init();
00076 return true;
00077 }
00078
00079 int32_t CGaussian::get_num_model_parameters()
00080 {
00081 switch (m_cov_type)
00082 {
00083 case FULL:
00084 return m_u.num_rows*m_u.num_cols+m_d.vlen+m_mean.vlen;
00085 case DIAG:
00086 return m_d.vlen+m_mean.vlen;
00087 case SPHERICAL:
00088 return 1+m_mean.vlen;
00089 }
00090 return 0;
00091 }
00092
00093 float64_t CGaussian::get_log_model_parameter(int32_t num_param)
00094 {
00095 SG_NOTIMPLEMENTED;
00096 return 0;
00097 }
00098
00099 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example)
00100 {
00101 SG_NOTIMPLEMENTED;
00102 return 0;
00103 }
00104
00105 float64_t CGaussian::get_log_likelihood_example(int32_t num_example)
00106 {
00107 ASSERT(features->has_property(FP_DOT));
00108 SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example);
00109 float64_t answer=compute_log_PDF(v);
00110 return answer;
00111 }
00112
00113 float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point)
00114 {
00115 ASSERT(m_mean.vector && m_d.vector);
00116 ASSERT(point.vlen == m_mean.vlen);
00117 float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
00118 memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
00119
00120 for (int32_t i = 0; i < m_mean.vlen; i++)
00121 difference[i] -= m_mean.vector[i];
00122
00123 float64_t answer=m_constant;
00124
00125 if (m_cov_type==FULL)
00126 {
00127 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
00128 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
00129 1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
00130
00131 for (int32_t i=0; i<m_d.vlen; i++)
00132 answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
00133
00134 SG_FREE(temp_holder);
00135 }
00136 else if (m_cov_type==DIAG)
00137 {
00138 for (int32_t i=0; i<m_mean.vlen; i++)
00139 answer+=difference[i]*difference[i]/m_d.vector[i];
00140 }
00141 else
00142 {
00143 for (int32_t i=0; i<m_mean.vlen; i++)
00144 answer+=difference[i]*difference[i]/m_d.vector[0];
00145 }
00146
00147 SG_FREE(difference);
00148
00149 return -0.5*answer;
00150 }
00151
00152 SGVector<float64_t> CGaussian::get_mean()
00153 {
00154 return m_mean;
00155 }
00156
00157 void CGaussian::set_mean(SGVector<float64_t> mean)
00158 {
00159 if (mean.vlen==1)
00160 m_cov_type=SPHERICAL;
00161
00162 m_mean=mean;
00163 }
00164
00165 void CGaussian::set_cov(SGMatrix<float64_t> cov)
00166 {
00167 ASSERT(cov.num_rows==cov.num_cols);
00168 ASSERT(cov.num_rows==m_mean.vlen);
00169 decompose_cov(cov);
00170 init();
00171 }
00172
00173 void CGaussian::set_d(const SGVector<float64_t> d)
00174 {
00175 m_d = d;
00176 init();
00177 }
00178
00179 SGMatrix<float64_t> CGaussian::get_cov()
00180 {
00181 float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
00182 memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
00183
00184 if (m_cov_type==FULL)
00185 {
00186 if (!m_u.matrix)
00187 SG_ERROR("Unitary matrix not set\n");
00188
00189 float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00190 float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00191 memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
00192 for(int32_t i=0; i<m_d.vlen; i++)
00193 diag_holder[i*m_d.vlen+i]=m_d.vector[i];
00194
00195 cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
00196 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
00197 diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
00198 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00199 m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
00200 m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
00201
00202 SG_FREE(diag_holder);
00203 SG_FREE(temp_holder);
00204 }
00205 else if (m_cov_type==DIAG)
00206 {
00207 for (int32_t i=0; i<m_d.vlen; i++)
00208 cov[i*m_d.vlen+i]=m_d.vector[i];
00209 }
00210 else
00211 {
00212 for (int32_t i=0; i<m_mean.vlen; i++)
00213 cov[i*m_mean.vlen+i]=m_d.vector[0];
00214 }
00215 return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);
00216 }
00217
00218 void CGaussian::register_params()
00219 {
00220 m_parameters->add(&m_u, "m_u", "Unitary matrix.");
00221 m_parameters->add(&m_d, "m_d", "Diagonal.");
00222 m_parameters->add(&m_mean, "m_mean", "Mean.");
00223 m_parameters->add(&m_constant, "m_constant", "Constant part.");
00224 m_parameters->add((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type.");
00225 }
00226
00227 void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
00228 {
00229 switch (m_cov_type)
00230 {
00231 case FULL:
00232 m_u=SGMatrix<float64_t>(cov.num_rows,cov.num_rows);
00233 memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
00234
00235 m_d.vector=SGMatrix<float64_t>::compute_eigenvectors(m_u.matrix, cov.num_rows, cov.num_rows);
00236 m_d.vlen=cov.num_rows;
00237 m_u.num_rows=cov.num_rows;
00238 m_u.num_cols=cov.num_rows;
00239 break;
00240 case DIAG:
00241 m_d.vector=SG_MALLOC(float64_t, cov.num_rows);
00242
00243 for (int32_t i=0; i<cov.num_rows; i++)
00244 m_d.vector[i]=cov.matrix[i*cov.num_rows+i];
00245
00246 m_d.vlen=cov.num_rows;
00247 break;
00248 case SPHERICAL:
00249 m_d.vector=SG_MALLOC(float64_t, 1);
00250
00251 m_d.vector[0]=cov.matrix[0];
00252 m_d.vlen=1;
00253 break;
00254 }
00255 }
00256
00257 SGVector<float64_t> CGaussian::sample()
00258 {
00259 float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
00260 memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
00261
00262 switch (m_cov_type)
00263 {
00264 case FULL:
00265 case DIAG:
00266 for (int32_t i=0; i<m_mean.vlen; i++)
00267 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
00268
00269 break;
00270 case SPHERICAL:
00271 for (int32_t i=0; i<m_mean.vlen; i++)
00272 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
00273
00274 break;
00275 }
00276
00277 float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
00278
00279 for (int32_t i=0; i<m_mean.vlen; i++)
00280 random_vec[i]=CMath::randn_double();
00281
00282 if (m_cov_type==FULL)
00283 {
00284 float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00285 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00286 m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
00287 r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
00288 SG_FREE(r_matrix);
00289 r_matrix=temp_matrix;
00290 }
00291
00292 float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
00293
00294 cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
00295 1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
00296
00297 for (int32_t i=0; i<m_mean.vlen; i++)
00298 samp[i]+=m_mean.vector[i];
00299
00300 SG_FREE(random_vec);
00301 SG_FREE(r_matrix);
00302
00303 return SGVector<float64_t>(samp, m_mean.vlen, false);
00304 }
00305
00306 #endif