00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/preprocessor/KernelPCA.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/lib/config.h>
00014 #include <shogun/mathematics/Math.h>
00015
00016 #include <string.h>
00017 #include <stdlib.h>
00018
00019 #include <shogun/mathematics/lapack.h>
00020 #include <shogun/lib/common.h>
00021 #include <shogun/kernel/Kernel.h>
00022 #include <shogun/preprocessor/DimensionReductionPreprocessor.h>
00023 #include <shogun/features/Features.h>
00024 #include <shogun/features/SimpleFeatures.h>
00025 #include <shogun/io/SGIO.h>
00026
00027 using namespace shogun;
00028
00029 CKernelPCA::CKernelPCA() : CDimensionReductionPreprocessor()
00030 {
00031 init();
00032 }
00033
00034 CKernelPCA::CKernelPCA(CKernel* k) : CDimensionReductionPreprocessor()
00035 {
00036 init();
00037 set_kernel(k);
00038 }
00039
00040 void CKernelPCA::init()
00041 {
00042 m_initialized = false;
00043 m_init_features = NULL;
00044 m_transformation_matrix = SGMatrix<float64_t>(NULL, 0, 0, false);
00045 m_bias_vector = SGVector<float64_t>(NULL, 0, false);
00046
00047 m_parameters->add(&m_transformation_matrix, "transformation matrix",
00048 "matrix used to transform data");
00049 m_parameters->add(&m_bias_vector, "bias vector",
00050 "bias vector used to transform data");
00051 }
00052
00053 void CKernelPCA::cleanup()
00054 {
00055 m_transformation_matrix.destroy_matrix();
00056 m_bias_vector.destroy_vector();
00057 if (m_init_features) SG_UNREF(m_init_features);
00058 m_initialized = false;
00059 }
00060
00061 CKernelPCA::~CKernelPCA()
00062 {
00063 m_transformation_matrix.destroy_matrix();
00064 m_bias_vector.destroy_vector();
00065 if (m_init_features) SG_UNREF(m_init_features);
00066 }
00067
00068 bool CKernelPCA::init(CFeatures* features)
00069 {
00070 if (!m_initialized && m_kernel)
00071 {
00072 SG_REF(features);
00073 m_init_features = features;
00074
00075 m_kernel->init(features,features);
00076 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
00077 m_kernel->cleanup();
00078 int32_t n = kernel_matrix.num_cols;
00079 int32_t m = kernel_matrix.num_rows;
00080 ASSERT(n==m);
00081
00082 float64_t* bias_tmp = CMath::get_column_sum(kernel_matrix.matrix, n,n);
00083 CMath::scale_vector(-1.0/n, bias_tmp, n);
00084 float64_t s = CMath::sum(bias_tmp, n)/n;
00085 CMath::add_scalar(-s, bias_tmp, n);
00086
00087 CMath::center_matrix(kernel_matrix.matrix, n, m);
00088
00089 float64_t* eigenvalues=CMath::compute_eigenvectors(kernel_matrix.matrix, n, n);
00090
00091 for (int32_t i=0; i<n; i++)
00092 {
00093
00094 for (int32_t j=0; j<n; j++)
00095 kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i]));
00096 }
00097
00098 SG_FREE(eigenvalues);
00099
00100 m_transformation_matrix.destroy_matrix();
00101 m_bias_vector.destroy_vector();
00102
00103 m_transformation_matrix = SGMatrix<float64_t>(kernel_matrix.matrix,n,n);
00104 m_bias_vector = SGVector<float64_t>(n);
00105 CMath::fill_vector(m_bias_vector.vector, m_bias_vector.vlen, 0.0);
00106
00107 CMath::dgemv(1.0, m_transformation_matrix.matrix, n, n,
00108 CblasTrans, bias_tmp, 0.0, m_bias_vector.vector);
00109
00110 float64_t* rowsum = CMath::get_row_sum(m_transformation_matrix.matrix, n, n);
00111 CMath::scale_vector(1.0/n, rowsum, n);
00112
00113 for (int32_t i=0; i<n; i++)
00114 {
00115 for (int32_t j=0; j<n; j++)
00116 m_transformation_matrix.matrix[j+n*i] -= rowsum[i];
00117 }
00118 SG_FREE(rowsum);
00119 SG_FREE(bias_tmp);
00120
00121 m_initialized=true;
00122 SG_INFO("Done\n");
00123 return true;
00124 }
00125 return false;
00126 }
00127
00128
00129 SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features)
00130 {
00131 ASSERT(m_initialized);
00132 CSimpleFeatures<float64_t>* simple_features = (CSimpleFeatures<float64_t>*)features;
00133
00134 int32_t num_vectors = simple_features->get_num_vectors();
00135 int32_t i,j,k;
00136 int32_t n = m_transformation_matrix.num_cols;
00137
00138 m_kernel->init(features,m_init_features);
00139
00140 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
00141
00142 for (i=0; i<num_vectors; i++)
00143 {
00144 for (j=0; j<m_target_dim; j++)
00145 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
00146
00147 for (j=0; j<n; j++)
00148 {
00149 float64_t kij = m_kernel->kernel(i,j);
00150
00151 for (k=0; k<m_target_dim; k++)
00152 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
00153 }
00154 }
00155
00156 m_kernel->cleanup();
00157 simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
00158 return ((CSimpleFeatures<float64_t>*)features)->get_feature_matrix();
00159 }
00160
00161 SGVector<float64_t> CKernelPCA::apply_to_feature_vector(SGVector<float64_t> vector)
00162 {
00163 ASSERT(m_initialized);
00164 SGVector<float64_t> result = SGVector<float64_t>(m_target_dim);
00165 m_kernel->init(new CSimpleFeatures<float64_t>(SGMatrix<float64_t>(vector.vector,vector.vlen,1)),
00166 m_init_features);
00167
00168 int32_t j,k;
00169 int32_t n = m_transformation_matrix.num_cols;
00170
00171 for (j=0; j<m_target_dim; j++)
00172 result.vector[j] = m_bias_vector.vector[j];
00173
00174 for (j=0; j<n; j++)
00175 {
00176 float64_t kj = m_kernel->kernel(0,j);
00177
00178 for (k=0; k<m_target_dim; k++)
00179 result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j];
00180 }
00181
00182 m_kernel->cleanup();
00183 return result;
00184 }
00185
00186 CSimpleFeatures<float64_t>* CKernelPCA::apply_to_string_features(CFeatures* features)
00187 {
00188 ASSERT(m_initialized);
00189
00190 int32_t num_vectors = features->get_num_vectors();
00191 int32_t i,j,k;
00192 int32_t n = m_transformation_matrix.num_cols;
00193
00194 m_kernel->init(features,m_init_features);
00195
00196 float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
00197
00198 for (i=0; i<num_vectors; i++)
00199 {
00200 for (j=0; j<m_target_dim; j++)
00201 new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
00202
00203 for (j=0; j<n; j++)
00204 {
00205 float64_t kij = m_kernel->kernel(i,j);
00206
00207 for (k=0; k<m_target_dim; k++)
00208 new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
00209 }
00210 }
00211
00212 m_kernel->cleanup();
00213
00214 return new CSimpleFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
00215 }
00216
00217 #endif