SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FisherLDA.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Abhijeet Kislay
32  */
33 #include <shogun/lib/config.h>
34 
35 #include <shogun/lib/common.h>
36 #include <shogun/io/SGIO.h>
45 #include <vector>
46 
47 using namespace std;
48 using namespace Eigen;
49 using namespace shogun;
50 
51 CFisherLDA::CFisherLDA (EFLDAMethod method, float64_t thresh):
53 {
54  initialize_parameters();
55  m_method=method;
56  m_threshold=thresh;
57 }
58 
59 void CFisherLDA::initialize_parameters()
60 {
62  m_threshold=0.01;
63  m_num_dim=0;
64  SG_ADD(&m_method, "FLDA_method","method for performing FLDA",
66  SG_ADD(&m_num_dim, "final_dimensions","dimensions to be retained",
68  SG_ADD(&m_transformation_matrix, "transformation_matrix","Transformation"
69  " matrix (Eigenvectors of covariance matrix).", MS_NOT_AVAILABLE);
70  SG_ADD(&m_mean_vector, "mean_vector", "Mean Vector.", MS_NOT_AVAILABLE);
71  SG_ADD(&m_eigenvalues_vector, "eigenvalues_vector",
72  "Vector with Eigenvalues.", MS_NOT_AVAILABLE);
73 }
74 
76 {
77 }
78 
79 bool CFisherLDA::fit(CFeatures *features, CLabels *labels, int32_t num_dimensions)
80 {
81  REQUIRE(features, "Features are not provided!\n")
82 
83  REQUIRE(features->get_feature_class()==C_DENSE,
84  "LDA only works with dense features. you provided %s\n",
85  features->get_name());
86 
87  REQUIRE(features->get_feature_type()==F_DREAL,
88  "LDA only works with real features.\n");
89 
90  REQUIRE(labels, "Labels for the given features are not specified!\n")
91 
92  REQUIRE(labels->get_label_type()==LT_MULTICLASS, "The labels should be of "
93  "the type MulticlassLabels! you provided %s\n", labels->get_name());
94 
95  SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)
96  ->get_feature_matrix();
97 
98  SGVector<float64_t> labels_vector=((CMulticlassLabels*)labels)->get_labels();
99 
100  int32_t num_vectors=feature_matrix.num_cols;
101  int32_t num_features=feature_matrix.num_rows;
102 
103  REQUIRE(labels_vector.vlen==num_vectors,"The number of samples provided (%d)"
104  " must be equal to the number of labels provided(%d)\n",num_vectors,
105  labels_vector.vlen);
106 
107  // C holds the number of unique classes.
108  int32_t C=((CMulticlassLabels*)labels)->get_num_classes();
109 
110  REQUIRE(C>1, "At least two classes are needed to perform LDA.\n")
111 
112  int32_t i=0;
113  int32_t j=0;
114 
115  m_num_dim=num_dimensions;
116  // max target dimension allowed.
117  // int32_t max_dim_allowed=C-1;
118 
119  // clip number if Dimensions to be a valid number
120  if ((m_num_dim<=0) || (m_num_dim>(C-1)))
121  m_num_dim=(C-1);
122 
123  MatrixXd fmatrix=Map<MatrixXd>(feature_matrix.matrix, num_features,
124  num_vectors);
125  Map<VectorXd> lvector(labels_vector.vector, num_vectors);
126 
127  // holds the total mean
128  m_mean_vector=SGVector<float64_t>(num_features);
129  Map<VectorXd>mean_total (m_mean_vector.vector, num_features);
130  mean_total=VectorXd::Zero(num_features);
131  // holds the mean for each class
132  vector<VectorXd> mean_class(C);
133 
134  // holds the frequency for each class.
135  // i.e the i'th element holds the number
136  // of times class i is observed.
137  VectorXd num_class=VectorXd::Zero(C);
138 
139  // calculate the class means and the total means.
140  for (i=0; i<C; i++)
141  {
142  mean_class[i]=VectorXd::Zero(num_features);
143  for (j=0; j<num_vectors; j++)
144  {
145  if (i==lvector[j])
146  {
147  num_class[i]++;
148  mean_class[i]+=fmatrix.col(j);
149  }
150  }
151  mean_class[i]/=(float64_t)num_class[i];
152  mean_total+=mean_class[i];
153  }
154  mean_total/=(float64_t)C;
155 
156  // Subtract the class means from the 'respective' data.
157  // e.g all data belonging to class 0 is subtracted by
158  // the mean of class 0 data.
159  for (i=0; i<C; i++)
160  for (j=0; j<num_vectors; j++)
161  if (i==lvector[j])
162  fmatrix.col(j)-=mean_class[i];
163 
164  if ((m_method==CANVAR_FLDA) ||
165  (m_method==AUTO_FLDA && num_vectors<num_features))
166  {
167  // holds the fmatrix for each class
168  vector<MatrixXd> centered_class_i(C);
169  VectorXd temp=num_class;
170  MatrixXd Sw=MatrixXd::Zero(num_features, num_features);
171  for (i=0; i<C; i++)
172  {
173  centered_class_i[i]=MatrixXd::Zero(num_features, num_class[i]);
174  for (j=0; j<num_vectors; j++)
175  if (i==lvector[j])
176  centered_class_i[i].col(num_class[i]-(temp[i]--))
177  =fmatrix.col(j);
178  Sw+=(centered_class_i[i]*centered_class_i[i].transpose())
179  *num_class[i]/(float64_t)(num_class[i]-1);
180  }
181 
182  // within class matrix for cannonical variates implementation
183  MatrixXd Sb(num_features, C);
184  for (i=0; i<C; i++)
185  Sb.col(i)=sqrt(num_class[i])*(mean_class[i]-mean_total);
186 
187  MatrixXd fmatrix1=Map<MatrixXd>(feature_matrix.matrix, num_features,
188  num_vectors);
189 
190  JacobiSVD<MatrixXd> svd(fmatrix1, ComputeThinU | ComputeThinV);
191  // basis to represent the solution
192  MatrixXd Q;
193 
194  if(num_features>num_vectors)
195  {
196  j=0;
197  for (i=0;i<num_vectors;i++)
198  if (svd.singularValues()(i)>m_threshold)
199  j++;
200  else
201  break;
202  Q=svd.matrixU().leftCols(j);
203  }
204  else
205  Q=svd.matrixU();
206 
207  // Sb is the modified between scatter
208  Sb=(Q.transpose())*Sb*(Sb.transpose())*Q;
209  // Sw is the modified within scatter
210  Sw=Q.transpose()*Sw*Q;
211 
212  // to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))
213  //1.get Cw=Chol(Sw)
214  //find the decomposition of Cw'
215  HouseholderQR<MatrixXd> decomposition(Sw.llt().matrixU().transpose());
216  //2.get P=inv(Cw')*Sb
217  //MatrixXd P=decomposition.solve(Sb);
218  //3. final value to be put in SVD will be therefore:
219  // final_ output = (inv(Cw')*(P'))';
220  //MatrixXd X_final_chol=(decomposition.solve(P.transpose())).transpose();
221  JacobiSVD<MatrixXd> svd2(decomposition.solve
222  (decomposition.solve(Sb).transpose()).transpose(),ComputeThinU);
224  Map<MatrixXd> eigenVectors(m_transformation_matrix.matrix, num_features,
225  m_num_dim);
226 
227  eigenVectors=Q*(svd2.matrixU()).leftCols(m_num_dim);
228 
231  eigenValues=svd2.singularValues().topRows(m_num_dim);
232  }
233  else
234  {
235  // For holding the within class scatter.
236  MatrixXd Sw=fmatrix*fmatrix.transpose();
237 
238  // For holding the between class scatter.
239  MatrixXd Sb(num_features, C);
240 
241  for (i=0; i<C; i++)
242  Sb.col(i)=mean_class[i];
243 
244  Sb=Sb-mean_total.rowwise().replicate(C);
245  Sb=Sb*Sb.transpose();
246 
247  // calculate the Ax=b problem
248  // where A=Sw
249  // b=Sb
250  // x=M
251  // MatrixXd M=Sw.householderQr().solve(Sb);
252  // calculate the eigenvalues and eigenvectors of M.
253  EigenSolver<MatrixXd> es(Sw.householderQr().solve(Sb));
254 
255  MatrixXd all_eigenvectors=es.eigenvectors().real();
256  VectorXd all_eigenvalues=es.eigenvalues().real();
257 
258  std::vector<pair<float64_t, int32_t> > data(num_features);
259  for (i=0; i<num_features; i++)
260  {
261  data[i].first=all_eigenvalues[i];
262  data[i].second=i;
263  }
264  // sort the eigenvalues.
265  std::sort (data.begin(), data.end());
266 
267  // keep 'm_num_dim' numbers of top Eigenvalues
269  Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, m_num_dim);
270 
271  // keep 'm_num_dim' numbers of EigenVectors
272  // corresponding to their respective eigenvalues
274  Map<MatrixXd> eigenVectors(m_transformation_matrix.matrix, num_features,
275  m_num_dim);
276 
277  for (i=0; i<m_num_dim; i++)
278  {
279  eigenValues[i]=data[num_features-i-1].first;
280  eigenVectors.col(i)=all_eigenvectors.col(data[num_features-i-1].second);
281  }
282  }
283  return true;
284 }
285 
287 {
291 }
292 
294 {
295  REQUIRE(features->get_feature_class()==C_DENSE,
296  "LDA only works with dense features\n");
297 
298  REQUIRE(features->get_feature_type()==F_DREAL,
299  "LDA only works with real features\n");
300 
302  features)->get_feature_matrix();
303 
304  int32_t num_vectors=m.num_cols;
305  int32_t num_features=m.num_rows;
306 
307  SG_INFO("Transforming feature matrix\n")
310 
311  SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
312 
313  Map<MatrixXd> feature_matrix (m.matrix, num_features, num_vectors);
314 
315  feature_matrix.block (0, 0, m_num_dim, num_vectors)=
316  transform_matrix.transpose()*feature_matrix;
317 
318  SG_INFO("Form matrix of target dimension")
319  for (int32_t col=0; col<num_vectors; col++)
320  {
321  for (int32_t row=0; row<m_num_dim; row++)
322  m[col*m_num_dim+row]=feature_matrix(row, col);
323  }
324  m.num_rows=m_num_dim;
325  m.num_cols=num_vectors;
326  ((CDenseFeatures<float64_t>*)features)->set_feature_matrix(m);
327  return m;
328 }
329 
331 {
333  Map<VectorXd> resultVec(result.vector, m_num_dim);
334  Map<VectorXd> inputVec(vector.vector, vector.vlen);
335 
339 
340  resultVec=transformMat.transpose()*inputVec;
341  return result;
342 }
343 
345 {
347 }
348 
350 {
351  return m_eigenvalues_vector;
352 }
353 
355 {
356  return m_mean_vector;
357 }
virtual const char * get_name() const =0
the class DimensionReductionPreprocessor, a base class for preprocessors used to lower the dimensiona...
#define SG_INFO(...)
Definition: SGIO.h:118
virtual ELabelType get_label_type() const =0
virtual SGMatrix< float64_t > apply_to_feature_matrix(CFeatures *features)
Definition: FisherLDA.cpp:293
SGMatrix< float64_t > get_transformation_matrix()
Definition: FisherLDA.cpp:344
virtual bool fit(CFeatures *features, CLabels *labels, int32_t num_dimensions=0)
Definition: FisherLDA.cpp:79
virtual SGVector< float64_t > apply_to_feature_vector(SGVector< float64_t > vector)
Definition: FisherLDA.cpp:330
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
multi-class labels 0,1,...
Definition: LabelTypes.h:20
Definition: SGMatrix.h:20
Definition: basetag.h:132
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
virtual ~CFisherLDA()
Definition: FisherLDA.cpp:75
index_t num_rows
Definition: SGMatrix.h:374
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:494
double float64_t
Definition: common.h:50
virtual EFeatureClass get_feature_class() const =0
SGMatrix< float64_t > m_transformation_matrix
Definition: FisherLDA.h:156
SGVector< float64_t > m_mean_vector
Definition: FisherLDA.h:164
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > m_eigenvalues_vector
Definition: FisherLDA.h:166
EFLDAMethod
Definition: FisherLDA.h:48
float64_t m_threshold
Definition: FisherLDA.h:160
#define SG_ADD(...)
Definition: SGObject.h:84
SGVector< float64_t > get_mean()
Definition: FisherLDA.cpp:354
virtual EFeatureType get_feature_type() const =0
virtual void cleanup()
Definition: FisherLDA.cpp:286
SGVector< float64_t > get_eigenvalues()
Definition: FisherLDA.cpp:349

SHOGUN Machine Learning Toolbox - Documentation