00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/KernelPCA.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/config.h>
00014 #include <shogun/mathematics/Math.h>
00015
00016 #include <string.h>
00017 #include <stdlib.h>
00018
00019 #include <shogun/mathematics/lapack.h>
00020 #include <shogun/lib/common.h>
00021 #include <shogun/kernel/Kernel.h>
00022 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
00023 #include <shogun/features/Features.h>
00024 #include <shogun/io/SGIO.h>
00025
00026 using namespace shogun;
00027
00028 CKernelPCA::CKernelPCA() : CDimensionReductionPreprocessor()
00029 {
00030 init();
00031 }
00032
00033 CKernelPCA::CKernelPCA(CKernel* k) : CDimensionReductionPreprocessor()
00034 {
00035 init();
00036 set_kernel(k);
00037 }
00038
00039 void CKernelPCA::init()
00040 {
00041 m_initialized = false;
00042 m_init_features = NULL;
00043 m_transformation_matrix = SGMatrix<float64_t>(NULL, 0, 0, false);
00044 m_bias_vector = SGVector<float64_t>(NULL, 0, false);
00045
00046 SG_ADD(&m_transformation_matrix, "transformation matrix",
00047 "matrix used to transform data", MS_NOT_AVAILABLE);
00048 SG_ADD(&m_bias_vector, "bias vector",
00049 "bias vector used to transform data", MS_NOT_AVAILABLE);
00050 }
00051
00052 void CKernelPCA::cleanup()
00053 {
00054 m_transformation_matrix = SGMatrix<float64_t>();
00055
00056 if (m_init_features)
00057 SG_UNREF(m_init_features);
00058
00059 m_initialized = false;
00060 }
00061
00062 CKernelPCA::~CKernelPCA()
00063 {
00064 if (m_init_features)
00065 SG_UNREF(m_init_features);
00066 }
00067
00068 bool CKernelPCA::init(CFeatures* features)
00069 {
00070 if (!m_initialized && m_kernel)
00071 {
00072 SG_REF(features);
00073 m_init_features = features;
00074
00075 m_kernel->init(features,features);
00076 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00077 m_kernel->cleanup();
00078 int32_t n = kernel_matrix.num_cols;
00079 int32_t m = kernel_matrix.num_rows;
00080 ASSERT(n==m);
00081
00082 float64_t* bias_tmp = SGMatrix<float64_t>::get_column_sum(kernel_matrix.matrix, n,n);
00083 SGVector<float64_t>::scale_vector(-1.0/n, bias_tmp, n);
00084 float64_t s = SGVector<float64_t>::sum(bias_tmp, n)/n;
00085 SGVector<float64_t>::add_scalar(-s, bias_tmp, n);
00086
00087 SGMatrix<float64_t>::center_matrix(kernel_matrix.matrix, n, m);
00088
00089 float64_t* eigenvalues=SGMatrix<float64_t>::compute_eigenvectors(kernel_matrix.matrix, n, n);
00090
00091 for (int32_t i=0; i<n; i++)
00092 {
00093
00094 for (int32_t j=0; j<n; j++)
00095 kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i]));
00096 }
00097
00098 SG_FREE(eigenvalues);
00099
00100 m_transformation_matrix = SGMatrix<float64_t>(kernel_matrix.matrix,n,n);
00101 kernel_matrix.matrix = NULL;
00102 m_bias_vector = SGVector<float64_t>(n);
00103 SGVector<float64_t>::fill_vector(m_bias_vector.vector, m_bias_vector.vlen, 0.0);
00104
00105 cblas_dgemv(CblasColMajor, CblasTrans,
00106 n, n, 1.0, m_transformation_matrix.matrix, n,
00107 bias_tmp, 1, 0.0, m_bias_vector.vector, 1);
00108
00109 float64_t* rowsum = SGMatrix<float64_t>::get_row_sum(m_transformation_matrix.matrix, n, n);
00110 SGVector<float64_t>::scale_vector(1.0/n, rowsum, n);
00111
00112 for (int32_t i=0; i<n; i++)
00113 {
00114 for (int32_t j=0; j<n; j++)
00115 m_transformation_matrix.matrix[j+n*i] -= rowsum[i];
00116 }
00117 SG_FREE(rowsum);
00118 SG_FREE(bias_tmp);
00119
00120 m_initialized=true;
00121 SG_INFO("Done\n");
00122 return true;
00123 }
00124 return false;
00125 }
00126
00127
00128 SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features)
00129 {
00130 ASSERT(m_initialized);
00131 CDenseFeatures<float64_t>* simple_features = (CDenseFeatures<float64_t>*)features;
00132
00133 int32_t num_vectors = simple_features->get_num_vectors();
00134 int32_t i,j,k;
00135 int32_t n = m_transformation_matrix.num_cols;
00136
00137 m_kernel->init(features,m_init_features);
00138
00139 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
00140
00141 for (i=0; i<num_vectors; i++)
00142 {
00143 for (j=0; j<m_target_dim; j++)
00144 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
00145
00146 for (j=0; j<n; j++)
00147 {
00148 float64_t kij = m_kernel->kernel(i,j);
00149
00150 for (k=0; k<m_target_dim; k++)
00151 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
00152 }
00153 }
00154
00155 m_kernel->cleanup();
00156 simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
00157 return ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
00158 }
00159
00160 SGVector<float64_t> CKernelPCA::apply_to_feature_vector(SGVector<float64_t> vector)
00161 {
00162 ASSERT(m_initialized);
00163 SGVector<float64_t> result = SGVector<float64_t>(m_target_dim);
00164 m_kernel->init(new CDenseFeatures<float64_t>(SGMatrix<float64_t>(vector.vector,vector.vlen,1)),
00165 m_init_features);
00166
00167 int32_t j,k;
00168 int32_t n = m_transformation_matrix.num_cols;
00169
00170 for (j=0; j<m_target_dim; j++)
00171 result.vector[j] = m_bias_vector.vector[j];
00172
00173 for (j=0; j<n; j++)
00174 {
00175 float64_t kj = m_kernel->kernel(0,j);
00176
00177 for (k=0; k<m_target_dim; k++)
00178 result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j];
00179 }
00180
00181 m_kernel->cleanup();
00182 return result;
00183 }
00184
00185 CDenseFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* features)
00186 {
00187 ASSERT(m_initialized);
00188
00189 int32_t num_vectors = features->get_num_vectors();
00190 int32_t i,j,k;
00191 int32_t n = m_transformation_matrix.num_cols;
00192
00193 m_kernel->init(features,m_init_features);
00194
00195 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
00196
00197 for (i=0; i<num_vectors; i++)
00198 {
00199 for (j=0; j<m_target_dim; j++)
00200 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
00201
00202 for (j=0; j<n; j++)
00203 {
00204 float64_t kij = m_kernel->kernel(i,j);
00205
00206 for (k=0; k<m_target_dim; k++)
00207 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
00208 }
00209 }
00210
00211 m_kernel->cleanup();
00212
00213 return new CDenseFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
00214 }
00215
00216 #endif