SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KernelPCA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Soeren Sonnenburg
8  * Copyright (C) 2011 Berlin Institute of Technology
9  */
10 
12 #ifdef HAVE_LAPACK
13 #include <shogun/lib/config.h>
15 
16 #include <string.h>
17 #include <stdlib.h>
18 
20 #include <shogun/lib/common.h>
21 #include <shogun/kernel/Kernel.h>
24 #include <shogun/io/SGIO.h>
25 
26 using namespace shogun;
27 
29 {
30  init();
31 }
32 
34 {
35  init();
36  set_kernel(k);
37 }
38 
40 {
41  m_initialized = false;
42  m_init_features = NULL;
45 
46  SG_ADD(&m_transformation_matrix, "transformation_matrix",
47  "matrix used to transform data", MS_NOT_AVAILABLE);
48  SG_ADD(&m_bias_vector, "bias_vector",
49  "bias vector used to transform data", MS_NOT_AVAILABLE);
50 }
51 
53 {
56 
57  if (m_init_features)
59 
60  m_initialized = false;
61 }
62 
64 {
65  if (m_init_features)
67 }
68 
69 bool CKernelPCA::init(CFeatures* features)
70 {
71  if (!m_initialized && m_kernel)
72  {
73  SG_REF(features);
74  m_init_features = features;
75 
76  m_kernel->init(features,features);
78  m_kernel->cleanup();
79  int32_t n = kernel_matrix.num_cols;
80  int32_t m = kernel_matrix.num_rows;
81  ASSERT(n==m)
82 
83  float64_t* bias_tmp = SGMatrix<float64_t>::get_column_sum(kernel_matrix.matrix, n,n);
84  SGVector<float64_t>::scale_vector(-1.0/n, bias_tmp, n);
85  float64_t s = SGVector<float64_t>::sum(bias_tmp, n)/n;
86  SGVector<float64_t>::add_scalar(-s, bias_tmp, n);
87 
88  SGMatrix<float64_t>::center_matrix(kernel_matrix.matrix, n, m);
89 
90  float64_t* eigenvalues=SGMatrix<float64_t>::compute_eigenvectors(kernel_matrix.matrix, n, n);
91 
92  for (int32_t i=0; i<n; i++)
93  {
94  //normalize and trap divide by zero and negative eigenvalues
95  for (int32_t j=0; j<n; j++)
96  kernel_matrix.matrix[i*n+j]/=CMath::sqrt(CMath::max(1e-16,eigenvalues[i]));
97  }
98 
99  SG_FREE(eigenvalues);
100 
102  kernel_matrix.matrix = NULL;
105 
106  cblas_dgemv(CblasColMajor, CblasTrans,
107  n, n, 1.0, m_transformation_matrix.matrix, n,
108  bias_tmp, 1, 0.0, m_bias_vector.vector, 1);
109 
111  SGVector<float64_t>::scale_vector(1.0/n, rowsum, n);
112 
113  for (int32_t i=0; i<n; i++)
114  {
115  for (int32_t j=0; j<n; j++)
116  m_transformation_matrix.matrix[j+n*i] -= rowsum[i];
117  }
118  SG_FREE(rowsum);
119  SG_FREE(bias_tmp);
120 
121  m_initialized=true;
122  SG_INFO("Done\n")
123  return true;
124  }
125  return false;
126 }
127 
128 
130 {
132  CDenseFeatures<float64_t>* simple_features = (CDenseFeatures<float64_t>*)features;
133 
134  int32_t num_vectors = simple_features->get_num_vectors();
135  int32_t i,j,k;
136  int32_t n = m_transformation_matrix.num_cols;
137 
138  m_kernel->init(features,m_init_features);
139 
140  float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
141 
142  for (i=0; i<num_vectors; i++)
143  {
144  for (j=0; j<m_target_dim; j++)
145  new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
146 
147  for (j=0; j<n; j++)
148  {
149  float64_t kij = m_kernel->kernel(i,j);
150 
151  for (k=0; k<m_target_dim; k++)
152  new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
153  }
154  }
155 
156  m_kernel->cleanup();
157  simple_features->set_feature_matrix(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
158  return ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
159 }
160 
162 {
167 
168  int32_t j,k;
169  int32_t n = m_transformation_matrix.num_cols;
170 
171  for (j=0; j<m_target_dim; j++)
172  result.vector[j] = m_bias_vector.vector[j];
173 
174  for (j=0; j<n; j++)
175  {
176  float64_t kj = m_kernel->kernel(0,j);
177 
178  for (k=0; k<m_target_dim; k++)
179  result.vector[k] += kj*m_transformation_matrix.matrix[(n-k-1)*n+j];
180  }
181 
182  m_kernel->cleanup();
183  return result;
184 }
185 
187 {
189 
190  int32_t num_vectors = features->get_num_vectors();
191  int32_t i,j,k;
192  int32_t n = m_transformation_matrix.num_cols;
193 
194  m_kernel->init(features,m_init_features);
195 
196  float64_t* new_feature_matrix = SG_MALLOC(float64_t, m_target_dim*num_vectors);
197 
198  for (i=0; i<num_vectors; i++)
199  {
200  for (j=0; j<m_target_dim; j++)
201  new_feature_matrix[i*m_target_dim+j] = m_bias_vector.vector[j];
202 
203  for (j=0; j<n; j++)
204  {
205  float64_t kij = m_kernel->kernel(i,j);
206 
207  for (k=0; k<m_target_dim; k++)
208  new_feature_matrix[k+i*m_target_dim] += kij*m_transformation_matrix.matrix[(n-k-1)*n+j];
209  }
210  }
211 
212  m_kernel->cleanup();
213 
214  return new CDenseFeatures<float64_t>(SGMatrix<float64_t>(new_feature_matrix,m_target_dim,num_vectors));
215 }
216 
217 #endif
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
the class DimensionReductionPreprocessor, a base class for preprocessors used to lower the dimensiona...
static void center_matrix(T *matrix, int32_t m, int32_t n)
Definition: SGMatrix.cpp:348
#define SG_INFO(...)
Definition: SGIO.h:118
virtual void cleanup()
Definition: Kernel.cpp:173
SGVector< float64_t > m_bias_vector
Definition: KernelPCA.h:104
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:221
virtual SGVector< float64_t > apply_to_feature_vector(SGVector< float64_t > vector)
Definition: KernelPCA.cpp:161
virtual SGMatrix< float64_t > apply_to_feature_matrix(CFeatures *features)
Definition: KernelPCA.cpp:129
static T * get_column_sum(T *matrix, int32_t m, int32_t n)
Definition: SGMatrix.cpp:329
void set_feature_matrix(SGMatrix< ST > matrix)
virtual int32_t get_num_vectors() const =0
index_t num_cols
Definition: SGMatrix.h:376
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:207
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:220
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
virtual void cleanup()
cleanup
Definition: KernelPCA.cpp:52
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:820
virtual CDenseFeatures< float64_t > * apply_to_string_features(CFeatures *features)
Definition: KernelPCA.cpp:186
virtual ~CKernelPCA()
Definition: KernelPCA.cpp:63
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
virtual int32_t get_num_vectors() const
static void add_scalar(T alpha, T *vec, int32_t len)
Add scalar to vector inplace.
Definition: SGVector.h:344
double float64_t
Definition: common.h:50
CFeatures * m_init_features
Definition: KernelPCA.h:98
SGMatrix< float64_t > m_transformation_matrix
Definition: KernelPCA.h:101
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
static T max(T a, T b)
Definition: Math.h:168
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T * get_row_sum(T *matrix, int32_t m, int32_t n)
Definition: SGMatrix.cpp:316
The Kernel base class.
Definition: Kernel.h:159
#define SG_ADD(...)
Definition: SGObject.h:84
static float32_t sqrt(float32_t x)
Definition: Math.h:459
static SGVector< float64_t > compute_eigenvectors(SGMatrix< float64_t > matrix)
Definition: SGMatrix.cpp:891

SHOGUN Machine Learning Toolbox - Documentation