SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PCA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 1999-2008,2011 Soeren Sonnenburg
9  * Written (W) 2014 Parijat Mazumdar
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2011 Berlin Institute of Technology
12  */
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
18 #include <string.h>
19 #include <stdlib.h>
20 #include <shogun/lib/common.h>
23 #include <shogun/io/SGIO.h>
25 
26 using namespace shogun;
27 using namespace Eigen;
28 
29 CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method, EPCAMemoryMode mem_mode)
31 {
32  init();
33  m_whitening = do_whitening;
34  m_mode = mode;
35  m_thresh = thresh;
36  m_mem_mode = mem_mode;
37  m_method = method;
38 }
39 
40 CPCA::CPCA(EPCAMethod method, bool do_whitening, EPCAMemoryMode mem_mode)
42 {
43  init();
44  m_whitening = do_whitening;
45  m_mem_mode = mem_mode;
46  m_method = method;
47 }
48 
49 void CPCA::init()
50 {
54  num_dim = 0;
55  m_initialized = false;
56  m_whitening = false;
58  m_thresh = 1e-6;
60  m_method = AUTO;
62 
63  SG_ADD(&m_transformation_matrix, "transformation_matrix",
64  "Transformation matrix (Eigenvectors of covariance matrix).",
66  SG_ADD(&m_mean_vector, "mean_vector", "Mean Vector.", MS_NOT_AVAILABLE);
67  SG_ADD(&m_eigenvalues_vector, "eigenvalues_vector",
68  "Vector with Eigenvalues.", MS_NOT_AVAILABLE);
69  SG_ADD(&m_initialized, "initalized", "True when initialized.",
71  SG_ADD(&m_whitening, "whitening", "Whether data shall be whitened.",
72  MS_AVAILABLE);
73  SG_ADD((machine_int_t*) &m_mode, "mode", "PCA Mode.", MS_AVAILABLE);
74  SG_ADD(&m_thresh, "m_thresh", "Cutoff threshold.", MS_AVAILABLE);
75  SG_ADD((machine_int_t*) &m_mem_mode, "m_mem_mode",
76  "Memory mode (in-place or reallocation).", MS_NOT_AVAILABLE);
77  SG_ADD((machine_int_t*) &m_method, "m_method",
78  "Method used for PCA calculation", MS_NOT_AVAILABLE);
79  SG_ADD(&m_eigenvalue_zero_tolerance, "eigenvalue_zero_tolerance", "zero tolerance"
80  " for determining zero eigenvalues during whitening to avoid numerical issues", MS_NOT_AVAILABLE);
81 }
82 
84 {
85 }
86 
87 bool CPCA::init(CFeatures* features)
88 {
89  if (!m_initialized)
90  {
91  REQUIRE(features->get_feature_class()==C_DENSE, "PCA only works with dense features")
92  REQUIRE(features->get_feature_type()==F_DREAL, "PCA only works with real features")
93 
94  SGMatrix<float64_t> feature_matrix = ((CDenseFeatures<float64_t>*)features)
95  ->get_feature_matrix();
96  int32_t num_vectors = feature_matrix.num_cols;
97  int32_t num_features = feature_matrix.num_rows;
98  SG_INFO("num_examples: %ld num_features: %ld \n", num_vectors, num_features)
99 
100  // max target dim allowed
101  int32_t max_dim_allowed = CMath::min(num_vectors, num_features);
102  num_dim=0;
103 
104  REQUIRE(m_target_dim<=max_dim_allowed,
105  "target dimension should be less or equal to than minimum of N and D")
106 
107  // center data
108  Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
109  m_mean_vector = SGVector<float64_t>(num_features);
110  Map<VectorXd> data_mean(m_mean_vector.vector, num_features);
111  data_mean = fmatrix.rowwise().sum()/(float64_t) num_vectors;
112  fmatrix = fmatrix.colwise()-data_mean;
113 
114  m_eigenvalues_vector = SGVector<float64_t>(max_dim_allowed);
115  Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, max_dim_allowed);
116 
117  if (m_method == AUTO)
118  m_method = (num_vectors>num_features) ? EVD : SVD;
119 
120  if (m_method == EVD)
121  {
122  // covariance matrix
123  MatrixXd cov_mat(num_features, num_features);
124  cov_mat = fmatrix*fmatrix.transpose();
125  cov_mat /= (num_vectors-1);
126 
127  SG_INFO("Computing Eigenvalues ... ")
128  // eigen value computed
129  SelfAdjointEigenSolver<MatrixXd> eigenSolve =
130  SelfAdjointEigenSolver<MatrixXd>(cov_mat);
131  eigenValues = eigenSolve.eigenvalues().tail(max_dim_allowed);
132 
133  // target dimension
134  switch (m_mode)
135  {
136  case FIXED_NUMBER :
138  break;
139 
140  case VARIANCE_EXPLAINED :
141  {
142  float64_t eig_sum = eigenValues.sum();
143  float64_t com_sum = 0;
144  for (int32_t i=num_features-1; i<-1; i++)
145  {
146  num_dim++;
147  com_sum += m_eigenvalues_vector.vector[i];
148  if (com_sum/eig_sum>=m_thresh)
149  break;
150  }
151  }
152  break;
153 
154  case THRESHOLD :
155  for (int32_t i=num_features-1; i<-1; i++)
156  {
158  num_dim++;
159  else
160  break;
161  }
162  break;
163  };
164  SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
165 
167  Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix,
168  num_features, num_dim);
169  num_old_dim = num_features;
170 
171  // eigenvector matrix
172  transformMatrix = eigenSolve.eigenvectors().block(0,
173  num_features-num_dim, num_features,num_dim);
174  if (m_whitening)
175  {
176  for (int32_t i=0; i<num_dim; i++)
177  {
178  if (CMath::fequals_abs<float64_t>(0.0, eigenValues[i+max_dim_allowed-num_dim],
180  {
181  SG_WARNING("Covariance matrix has almost zero Eigenvalue (ie "
182  "Eigenvalue within a tolerance of %E around 0) at "
183  "dimension %d. Consider reducing its dimension.",
184  m_eigenvalue_zero_tolerance, i+max_dim_allowed-num_dim+1)
185 
186  transformMatrix.col(i) = MatrixXd::Zero(num_features,1);
187  continue;
188  }
189 
190  transformMatrix.col(i) /=
191  sqrt(eigenValues[i+max_dim_allowed-num_dim]*(num_vectors-1));
192  }
193  }
194  }
195 
196  else
197  {
198  // compute SVD of data matrix
199  JacobiSVD<MatrixXd> svd(fmatrix.transpose(), ComputeThinU | ComputeThinV);
200 
201  // compute non-negative eigen values from singular values
202  eigenValues = svd.singularValues();
203  eigenValues = eigenValues.cwiseProduct(eigenValues)/(num_vectors-1);
204 
205  // target dimension
206  switch (m_mode)
207  {
208  case FIXED_NUMBER :
210  break;
211 
212  case VARIANCE_EXPLAINED :
213  {
214  float64_t eig_sum = eigenValues.sum();
215  float64_t com_sum = 0;
216  for (int32_t i=0; i<num_features; i++)
217  {
218  num_dim++;
219  com_sum += m_eigenvalues_vector.vector[i];
220  if (com_sum/eig_sum>=m_thresh)
221  break;
222  }
223  }
224  break;
225 
226  case THRESHOLD :
227  for (int32_t i=0; i<num_features; i++)
228  {
230  num_dim++;
231  else
232  break;
233  }
234  break;
235  };
236  SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)
237 
238  // right singular vectors form eigenvectors
240  Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix, num_features, num_dim);
241  num_old_dim = num_features;
242  transformMatrix = svd.matrixV().block(0, 0, num_features, num_dim);
243  if (m_whitening)
244  {
245  for (int32_t i=0; i<num_dim; i++)
246  {
247  if (CMath::fequals_abs<float64_t>(0.0, eigenValues[i],
249  {
250  SG_WARNING("Covariance matrix has almost zero Eigenvalue (ie "
251  "Eigenvalue within a tolerance of %E around 0) at "
252  "dimension %d. Consider reducing its dimension.",
254 
255  transformMatrix.col(i) = MatrixXd::Zero(num_features,1);
256  continue;
257  }
258 
259  transformMatrix.col(i) /= sqrt(eigenValues[i]*(num_vectors-1));
260  }
261  }
262  }
263 
264  // restore feature matrix
265  fmatrix = fmatrix.colwise()+data_mean;
266  m_initialized = true;
267  return true;
268  }
269 
270  return false;
271 }
272 
274 {
278  m_initialized = false;
279 }
280 
282 {
284  SGMatrix<float64_t> m = ((CDenseFeatures<float64_t>*) features)->get_feature_matrix();
285  int32_t num_vectors = m.num_cols;
286  int32_t num_features = m.num_rows;
287 
288  SG_INFO("Transforming feature matrix\n")
289  Map<MatrixXd> transform_matrix(m_transformation_matrix.matrix,
291 
292  if (m_mem_mode == MEM_IN_PLACE)
293  {
294  if (m.matrix)
295  {
296  SG_INFO("Preprocessing feature matrix\n")
297  Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
298  VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
299  feature_matrix = feature_matrix.colwise()-data_mean;
300 
301  feature_matrix.block(0,0,num_dim,num_vectors) =
302  transform_matrix.transpose()*feature_matrix;
303 
304  SG_INFO("Form matrix of target dimension")
305  for (int32_t col=0; col<num_vectors; col++)
306  {
307  for (int32_t row=0; row<num_dim; row++)
308  m.matrix[col*num_dim+row] = feature_matrix(row,col);
309  }
310  m.num_rows = num_dim;
311  m.num_cols = num_vectors;
312  }
313 
314  ((CDenseFeatures<float64_t>*) features)->set_feature_matrix(m);
315  return m;
316  }
317  else
318  {
319  SGMatrix<float64_t> ret(num_dim, num_vectors);
320  Map<MatrixXd> ret_matrix(ret.matrix, num_dim, num_vectors);
321  if (m.matrix)
322  {
323  SG_INFO("Preprocessing feature matrix\n")
324  Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
325  VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
326  feature_matrix = feature_matrix.colwise()-data_mean;
327 
328  ret_matrix = transform_matrix.transpose()*feature_matrix;
329  }
330  ((CDenseFeatures<float64_t>*) features)->set_feature_matrix(ret);
331  return ret;
332  }
333 }
334 
336 {
338  Map<VectorXd> resultVec(result.vector, num_dim);
339  Map<VectorXd> inputVec(vector.vector, vector.vlen);
340 
341  Map<VectorXd> mean(m_mean_vector.vector, m_mean_vector.vlen);
342  Map<MatrixXd> transformMat(m_transformation_matrix.matrix,
344 
345  inputVec = inputVec-mean;
346  resultVec = transformMat.transpose()*inputVec;
347  inputVec = inputVec+mean;
348 
349  return result;
350 }
351 
353 {
355 }
356 
358 {
359  return m_eigenvalues_vector;
360 }
361 
363 {
364  return m_mean_vector;
365 }
366 
368 {
369  return m_mem_mode;
370 }
371 
373 {
374  m_mem_mode = e;
375 }
376 
377 void CPCA::set_eigenvalue_zero_tolerance(float64_t eigenvalue_zero_tolerance)
378 {
379  m_eigenvalue_zero_tolerance = eigenvalue_zero_tolerance;
380 }
381 
383 {
385 }
386 
387 #endif // HAVE_EIGEN3

SHOGUN Machine Learning Toolbox - Documentation