Gaussian.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Alesis Novik
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 #include <shogun/lib/config.h>
00011 
00012 #ifdef HAVE_LAPACK
00013 
00014 #include <shogun/distributions/Gaussian.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/base/Parameter.h>
00017 
00018 using namespace shogun;
00019 
00020 CGaussian::CGaussian() : CDistribution(), m_constant(0), m_d(), m_u(), m_mean(), m_cov_type(FULL)
00021 {
00022     register_params();
00023 }
00024 
00025 CGaussian::CGaussian(SGVector<float64_t> mean, SGMatrix<float64_t> cov,
00026                     ECovType cov_type) : CDistribution(), m_d(), m_u(), m_cov_type(cov_type)
00027 {
00028     ASSERT(mean.vlen==cov.num_rows);
00029     ASSERT(cov.num_rows==cov.num_cols);
00030 
00031     m_mean=mean;
00032 
00033     if (cov.num_rows==1)
00034         m_cov_type=SPHERICAL;
00035 
00036     decompose_cov(cov);
00037     init();
00038     register_params();
00039 
00040     if (cov.do_free)
00041         cov.free_matrix();
00042 }
00043 
00044 void CGaussian::init()
00045 {
00046     m_constant=CMath::log(2*M_PI)*m_mean.vlen;
00047     switch (m_cov_type)
00048     {
00049         case FULL:
00050         case DIAG:
00051             for (int32_t i=0; i<m_d.vlen; i++)
00052                 m_constant+=CMath::log(m_d.vector[i]);
00053             break;
00054         case SPHERICAL:
00055             m_constant+=m_mean.vlen*CMath::log(m_d.vector[0]);
00056             break;
00057     }
00058 }
00059 
00060 CGaussian::~CGaussian()
00061 {
00062     m_d.destroy_vector();
00063     m_u.destroy_matrix();
00064     m_mean.free_vector();
00065 }
00066 
00067 bool CGaussian::train(CFeatures* data)
00068 {
00069     // init features with data if necessary and assure type is correct
00070     if (data)
00071     {
00072         if (!data->has_property(FP_DOT))
00073                 SG_ERROR("Specified features are not of type CDotFeatures\n");      
00074         set_features(data);
00075     }
00076     CDotFeatures* dotdata=(CDotFeatures *) data;
00077 
00078     m_mean.destroy_vector();
00079 
00080     m_mean=dotdata->get_mean();
00081     SGMatrix<float64_t> cov=dotdata->get_cov();
00082 
00083     decompose_cov(cov);
00084     cov.destroy_matrix();
00085 
00086     init();
00087 
00088     return true;
00089 }
00090 
00091 int32_t CGaussian::get_num_model_parameters()
00092 {
00093     switch (m_cov_type)
00094     {
00095         case FULL:
00096             return m_u.num_rows*m_u.num_cols+m_d.vlen+m_mean.vlen;
00097         case DIAG:
00098             return m_d.vlen+m_mean.vlen;
00099         case SPHERICAL:
00100             return 1+m_mean.vlen;
00101     }
00102     return 0;
00103 }
00104 
00105 float64_t CGaussian::get_log_model_parameter(int32_t num_param)
00106 {
00107     SG_NOTIMPLEMENTED;
00108     return 0;
00109 }
00110 
00111 float64_t CGaussian::get_log_derivative(int32_t num_param, int32_t num_example)
00112 {
00113     SG_NOTIMPLEMENTED;
00114     return 0;
00115 }
00116 
00117 float64_t CGaussian::get_log_likelihood_example(int32_t num_example)
00118 {
00119     ASSERT(features->has_property(FP_DOT));
00120     SGVector<float64_t> v=((CDotFeatures *)features)->get_computed_dot_feature_vector(num_example);
00121     float64_t answer=compute_log_PDF(v);
00122     v.free_vector();
00123     return answer;
00124 }
00125 
00126 float64_t CGaussian::compute_log_PDF(SGVector<float64_t> point)
00127 {
00128     ASSERT(m_mean.vector && m_d.vector);
00129     ASSERT(point.vlen == m_mean.vlen);
00130     float64_t* difference=SG_MALLOC(float64_t, m_mean.vlen);
00131     memcpy(difference, point.vector, sizeof(float64_t)*m_mean.vlen);
00132 
00133     for (int32_t i = 0; i < m_mean.vlen; i++)
00134         difference[i] -= m_mean.vector[i];
00135 
00136     float64_t answer=m_constant;
00137 
00138     if (m_cov_type==FULL)
00139     {
00140         float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen);
00141         cblas_dgemv(CblasRowMajor, CblasNoTrans, m_d.vlen, m_d.vlen,
00142                     1, m_u.matrix, m_d.vlen, difference, 1, 0, temp_holder, 1);
00143 
00144         for (int32_t i=0; i<m_d.vlen; i++)
00145             answer+=temp_holder[i]*temp_holder[i]/m_d.vector[i];
00146 
00147         SG_FREE(temp_holder);
00148     }
00149     else if (m_cov_type==DIAG)
00150     {
00151         for (int32_t i=0; i<m_mean.vlen; i++)
00152             answer+=difference[i]*difference[i]/m_d.vector[i];
00153     }
00154     else
00155     {
00156         for (int32_t i=0; i<m_mean.vlen; i++)
00157             answer+=difference[i]*difference[i]/m_d.vector[0];
00158     }
00159 
00160     SG_FREE(difference);
00161 
00162     return -0.5*answer;
00163 }
00164 
00165 SGMatrix<float64_t> CGaussian::get_cov()
00166 {
00167     float64_t* cov=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
00168     memset(cov, 0, sizeof(float64_t)*m_mean.vlen*m_mean.vlen);
00169 
00170     if (m_cov_type==FULL)
00171     {
00172         if (!m_u.matrix)
00173             SG_ERROR("Unitary matrix not set\n");
00174 
00175         float64_t* temp_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00176         float64_t* diag_holder=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00177         memset(diag_holder, 0, sizeof(float64_t)*m_d.vlen*m_d.vlen);
00178         for(int32_t i=0; i<m_d.vlen; i++)
00179             diag_holder[i*m_d.vlen+i]=m_d.vector[i];
00180 
00181         cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
00182                     m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
00183                     diag_holder, m_d.vlen, 0, temp_holder, m_d.vlen);
00184         cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00185                     m_d.vlen, m_d.vlen, m_d.vlen, 1, temp_holder, m_d.vlen,
00186                     m_u.matrix, m_d.vlen, 0, cov, m_d.vlen);
00187 
00188         SG_FREE(diag_holder);
00189         SG_FREE(temp_holder);
00190     }
00191     else if (m_cov_type==DIAG)
00192     {
00193         for (int32_t i=0; i<m_d.vlen; i++)
00194             cov[i*m_d.vlen+i]=m_d.vector[i];
00195     }
00196     else
00197     {
00198         for (int32_t i=0; i<m_mean.vlen; i++)
00199             cov[i*m_mean.vlen+i]=m_d.vector[0];
00200     }
00201     return SGMatrix<float64_t>(cov, m_mean.vlen, m_mean.vlen, false);//fix needed
00202 }
00203 
00204 void CGaussian::register_params()
00205 {
00206     m_parameters->add(&m_u, "m_u", "Unitary matrix.");
00207     m_parameters->add(&m_d, "m_d", "Diagonal.");
00208     m_parameters->add(&m_mean, "m_mean", "Mean.");
00209     m_parameters->add(&m_constant, "m_constant", "Constant part.");
00210     m_parameters->add((machine_int_t*)&m_cov_type, "m_cov_type", "Covariance type.");
00211 }
00212 
00213 void CGaussian::decompose_cov(SGMatrix<float64_t> cov)
00214 {
00215     m_d.destroy_vector();
00216     switch (m_cov_type)
00217     {
00218         case FULL:
00219             m_u.destroy_matrix();
00220             m_u.matrix=SG_MALLOC(float64_t, cov.num_rows*cov.num_rows);
00221             memcpy(m_u.matrix, cov.matrix, sizeof(float64_t)*cov.num_rows*cov.num_rows);
00222 
00223             m_d.vector=CMath::compute_eigenvectors(m_u.matrix, cov.num_rows, cov.num_rows);
00224             m_d.vlen=cov.num_rows;
00225             m_u.num_rows=cov.num_rows;
00226             m_u.num_cols=cov.num_rows;
00227             break;
00228         case DIAG:
00229             m_d.vector=SG_MALLOC(float64_t, cov.num_rows);
00230 
00231             for (int32_t i=0; i<cov.num_rows; i++)
00232                 m_d.vector[i]=cov.matrix[i*cov.num_rows+i];
00233             
00234             m_d.vlen=cov.num_rows;
00235             break;
00236         case SPHERICAL:
00237             m_d.vector=SG_MALLOC(float64_t, 1);
00238 
00239             m_d.vector[0]=cov.matrix[0];
00240             m_d.vlen=1;
00241             break;
00242     }
00243 }
00244 
00245 SGVector<float64_t> CGaussian::sample()
00246 {
00247     float64_t* r_matrix=SG_MALLOC(float64_t, m_mean.vlen*m_mean.vlen);
00248     memset(r_matrix, 0, m_mean.vlen*m_mean.vlen*sizeof(float64_t));
00249 
00250     switch (m_cov_type)
00251     {
00252         case FULL:
00253         case DIAG:
00254             for (int32_t i=0; i<m_mean.vlen; i++)
00255                 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[i]);
00256 
00257             break;
00258         case SPHERICAL:
00259             for (int32_t i=0; i<m_mean.vlen; i++)
00260                 r_matrix[i*m_mean.vlen+i]=CMath::sqrt(m_d.vector[0]);
00261 
00262             break;
00263     }
00264 
00265     float64_t* random_vec=SG_MALLOC(float64_t, m_mean.vlen);
00266 
00267     for (int32_t i=0; i<m_mean.vlen; i++)
00268         random_vec[i]=CMath::randn_double();
00269 
00270     if (m_cov_type==FULL)
00271     {
00272         float64_t* temp_matrix=SG_MALLOC(float64_t, m_d.vlen*m_d.vlen);
00273         cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
00274                     m_d.vlen, m_d.vlen, m_d.vlen, 1, m_u.matrix, m_d.vlen,
00275                     r_matrix, m_d.vlen, 0, temp_matrix, m_d.vlen);
00276         SG_FREE(r_matrix);
00277         r_matrix=temp_matrix;
00278     }
00279     
00280     float64_t* samp=SG_MALLOC(float64_t, m_mean.vlen);
00281 
00282     cblas_dgemv(CblasRowMajor, CblasNoTrans, m_mean.vlen, m_mean.vlen,
00283                 1, r_matrix, m_mean.vlen, random_vec, 1, 0, samp, 1);
00284 
00285     for (int32_t i=0; i<m_mean.vlen; i++)
00286         samp[i]+=m_mean.vector[i];
00287 
00288     SG_FREE(random_vec);
00289     SG_FREE(r_matrix);
00290 
00291     return SGVector<float64_t>(samp, m_mean.vlen, false);//fix needed
00292 }
00293 
00294 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation