00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/KernelPCA.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/config.h>
00014 #include <shogun/mathematics/Math.h>
00015
00016 #include <string.h>
00017 #include <stdlib.h>
00018
00019 #include <shogun/mathematics/lapack.h>
00020 #include <shogun/lib/common.h>
00021 #include <shogun/kernel/Kernel.h>
00022 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
00023 #include <shogun/features/Features.h>
00024 #include <shogun/features/SimpleFeatures.h>
00025 #include <shogun/io/SGIO.h>
00026
00027 using namespace shogun;
00028
00029 CKernelPCA::CKernelPCA() : CDimensionReductionPreprocessor()
00030 {
00031 init();
00032 }
00033
00034 CKernelPCA::CKernelPCA(CKernel* k) : CDimensionReductionPreprocessor()
00035 {
00036 init();
00037 SG_REF(k);
00038 m_kernel=k;
00039 }
00040
00041 void CKernelPCA::init()
00042 {
00043 m_kernel = NULL;
00044 m_initialized = false;
00045 m_init_features = NULL;
00046 m_transformation_matrix = SGMatrix<float64_t>(NULL, 0, 0, false);
00047 m_bias_vector = SGVector<float64_t>(NULL, 0, false);
00048
00049 m_parameters->add(&m_transformation_matrix, "transformation matrix",
00050 "matrix used to transform data");
00051 m_parameters->add(&m_bias_vector, "bias vector",
00052 "bias vector used to transform data");
00053 }
00054
00055 void CKernelPCA::cleanup()
00056 {
00057 m_transformation_matrix.destroy_matrix();
00058 m_bias_vector.destroy_vector();
00059 if (m_kernel) SG_UNREF(m_kernel);
00060 if (m_init_features) SG_UNREF(m_init_features);
00061 m_initialized = false;
00062 }
00063
00064 CKernelPCA::~CKernelPCA()
00065 {
00066 m_transformation_matrix.destroy_matrix();
00067 m_bias_vector.destroy_vector();
00068 if (m_kernel) SG_UNREF(m_kernel);
00069 if (m_init_features) SG_UNREF(m_init_features);
00070 }
00071
00072 bool CKernelPCA::init(CFeatures* features)
00073 {
00074 if (!m_initialized && m_kernel)
00075 {
00076 SG_REF(features);
00077 m_init_features = features;
00078
00079 m_kernel->init(features,features);
00080 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00081 m_kernel->cleanup();
00082 int32_t n = kernel_matrix.num_cols;
00083 int32_t m = kernel_matrix.num_rows;
00084 ASSERT(n==m);
00085
00086 float64_t* bias_tmp = CMath::get_column_sum(kernel_matrix.matrix, n,n);
00087 CMath::scale_vector(-1.0/n, bias_tmp, n);
00088 float64_t s = CMath::sum(bias_tmp, n)/n;
00089 CMath::add_scalar(-s, bias_tmp, n);
00090
00091 CMath::center_matrix(kernel_matrix.matrix, n, m);
00092
00093 float64_t* eigenvalues=CMath::compute_eigenvectors(kernel_matrix.matrix, n, n);
00094
00095 for (int32_t i=0; i<n; i++)
00096 {
00097
00098 for (int32_t j=0; j<n; j++)
00099 kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i]));
00100 }
00101
00102 SG_FREE(eigenvalues);
00103
00104 m_transformation_matrix.destroy_matrix();
00105 m_bias_vector.destroy_vector();
00106
00107 m_transformation_matrix = SGMatrix<float64_t>(kernel_matrix.matrix,n,n);
00108 m_bias_vector = SGVector<float64_t>(n);
00109 CMath::fill_vector(m_bias_vector.vector, m_bias_vector.vlen, 0.0);
00110
00111 CMath::dgemv(1.0, m_transformation_matrix.matrix, n, n,
00112 CblasTrans, bias_tmp, 0.0, m_bias_vector.vector);
00113
00114 float64_t* rowsum = CMath::get_row_sum(m_transformation_matrix.matrix, n, n);
00115 CMath::scale_vector(1.0/n, rowsum, n);
00116
00117 for (int32_t i=0; i<n; i++)
00118 {
00119 for (int32_t j=0; j<n; j++)
00120 m_transformation_matrix.matrix[j+n*i] -= rowsum[i];
00121 }
00122 SG_FREE(rowsum);
00123 SG_FREE(bias_tmp);
00124
00125 m_initialized=true;
00126 SG_INFO("Done\n");
00127 return true;
00128 }
00129 return false;
00130 }
00131
00132
00133 SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features)
00134 {
00135 ASSERT(m_initialized);
00136 CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*)features;
00137
00138 int32_t num_vectors = simple_features->get_num_vectors();
00139 int32_t i,j,k;
00140 int32_t n = m_transformation_matrix.num_cols;
00141
00142 m_kernel->init(features,m_init_features);
00143
00144 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
00145
00146 for (i=0; i<num_vectors; i++)
00147 {
00148 for (j=0; j<m_target_dim; j++)
00149 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
00150
00151 for (j=0; j<n; j++)
00152 {
00153 float64_t kij = m_kernel->kernel(i,j);
00154
00155 for (k=0; k<m_target_dim; k++)
00156 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
00157 }
00158 }
00159
00160 m_kernel->cleanup();
00161 simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
00162 return ((CSimpleFeatures<float64_t>*)features)->get_feature_matrix();
00163 }
00164
00165 SGVector<float64_t> CKernelPCA::apply_to_feature_vector(SGVector<float64_t> vector)
00166 {
00167 ASSERT(m_initialized);
00168 SGVector<float64_t> result = SGVector<float64_t>(m_target_dim);
00169 m_kernel->init(new CSimpleFeatures<float64_t>(SGMatrix<float64_t>(vector.vector,vector.vlen,1)),
00170 m_init_features);
00171
00172 int32_t j,k;
00173 int32_t n = m_transformation_matrix.num_cols;
00174
00175 for (j=0; j<m_target_dim; j++)
00176 result.vector[j] = m_bias_vector.vector[j];
00177
00178 for (j=0; j<n; j++)
00179 {
00180 float64_t kj = m_kernel->kernel(0,j);
00181
00182 for (k=0; k<m_target_dim; k++)
00183 result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j];
00184 }
00185
00186 m_kernel->cleanup();
00187 return result;
00188 }
00189
00190 CSimpleFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* features)
00191 {
00192 ASSERT(m_initialized);
00193
00194 int32_t num_vectors = features->get_num_vectors();
00195 int32_t i,j,k;
00196 int32_t n = m_transformation_matrix.num_cols;
00197
00198 m_kernel->init(features,m_init_features);
00199
00200 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
00201
00202 for (i=0; i<num_vectors; i++)
00203 {
00204 for (j=0; j<m_target_dim; j++)
00205 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
00206
00207 for (j=0; j<n; j++)
00208 {
00209 float64_t kij = m_kernel->kernel(i,j);
00210
00211 for (k=0; k<m_target_dim; k++)
00212 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
00213 }
00214 }
00215
00216 m_kernel->cleanup();
00217
00218 return new CSimpleFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
00219 }
00220
00221 #endif