SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
FisherLDA.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Abhijeet Kislay
32  */
33 #include <shogun/lib/config.h>
34 
35 #ifdef HAVE_EIGEN3
36 #include <shogun/lib/common.h>
37 #include <shogun/io/SGIO.h>
46 #include <vector>
47 
48 using namespace std;
49 using namespace Eigen;
50 using namespace shogun;
51 
52 CFisherLDA::CFisherLDA (EFLDAMethod method, float64_t thresh):
54 {
55  initialize_parameters();
56  m_method=method;
57  m_threshold=thresh;
58 }
59 
60 void CFisherLDA::initialize_parameters()
61 {
63  m_threshold=0.01;
64  m_num_dim=0;
65  SG_ADD(&m_method, "FLDA_method","method for performing FLDA",
67  SG_ADD(&m_num_dim, "final_dimensions","dimensions to be retained",
69  SG_ADD(&m_transformation_matrix, "transformation_matrix","Transformation"
70  " matrix (Eigenvectors of covariance matrix).", MS_NOT_AVAILABLE);
71  SG_ADD(&m_mean_vector, "mean_vector", "Mean Vector.", MS_NOT_AVAILABLE);
72  SG_ADD(&m_eigenvalues_vector, "eigenvalues_vector",
73  "Vector with Eigenvalues.", MS_NOT_AVAILABLE);
74 }
75 
77 {
78 }
79 
80 bool CFisherLDA::fit(CFeatures *features, CLabels *labels, int32_t num_dimensions)
81 {
82  REQUIRE(features, "Features are not provided!\n")
83 
84  REQUIRE(features->get_feature_class()==C_DENSE,
85  "LDA only works with dense features. you provided %s\n",
86  features->get_name());
87 
88  REQUIRE(features->get_feature_type()==F_DREAL,
89  "LDA only works with real features.\n");
90 
91  REQUIRE(labels, "Labels for the given features are not specified!\n")
92 
93  REQUIRE(labels->get_label_type()==LT_MULTICLASS, "The labels should be of "
94  "the type MulticlassLabels! you provided %s\n", labels->get_name());
95 
96  SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)
97  ->get_feature_matrix();
98 
99  SGVector<float64_t> labels_vector=((CMulticlassLabels*)labels)->get_labels();
100 
101  int32_t num_vectors=feature_matrix.num_cols;
102  int32_t num_features=feature_matrix.num_rows;
103 
104  REQUIRE(labels_vector.vlen==num_vectors,"The number of samples provided (%d)"
105  " must be equal to the number of labels provided(%d)\n",num_vectors,
106  labels_vector.vlen);
107 
108  // C holds the number of unique classes.
109  int32_t C=((CMulticlassLabels*)labels)->get_num_classes();
110 
111  REQUIRE(C>1, "At least two classes are needed to perform LDA.\n")
112 
113  int32_t i=0;
114  int32_t j=0;
115 
116  m_num_dim=num_dimensions;
117  // max target dimension allowed.
118  // int32_t max_dim_allowed=C-1;
119 
120  // clip number if Dimensions to be a valid number
121  if ((m_num_dim<=0) || (m_num_dim>(C-1)))
122  m_num_dim=(C-1);
123 
124  MatrixXd fmatrix=Map<MatrixXd>(feature_matrix.matrix, num_features,
125  num_vectors);
126  Map<VectorXd> lvector(labels_vector.vector, num_vectors);
127 
128  // holds the total mean
129  m_mean_vector=SGVector<float64_t>(num_features);
130  Map<VectorXd>mean_total (m_mean_vector.vector, num_features);
131  mean_total=VectorXd::Zero(num_features);
132  // holds the mean for each class
133  vector<VectorXd> mean_class(C);
134 
135  // holds the frequency for each class.
136  // i.e the i'th element holds the number
137  // of times class i is observed.
138  VectorXd num_class=VectorXd::Zero(C);
139 
140  // calculate the class means and the total means.
141  for (i=0; i<C; i++)
142  {
143  mean_class[i]=VectorXd::Zero(num_features);
144  for (j=0; j<num_vectors; j++)
145  {
146  if (i==lvector[j])
147  {
148  num_class[i]++;
149  mean_class[i]+=fmatrix.col(j);
150  }
151  }
152  mean_class[i]/=(float64_t)num_class[i];
153  mean_total+=mean_class[i];
154  }
155  mean_total/=(float64_t)C;
156 
157  // Subtract the class means from the 'respective' data.
158  // e.g all data belonging to class 0 is subtracted by
159  // the mean of class 0 data.
160  for (i=0; i<C; i++)
161  for (j=0; j<num_vectors; j++)
162  if (i==lvector[j])
163  fmatrix.col(j)-=mean_class[i];
164 
165  if ((m_method==CANVAR_FLDA) ||
166  (m_method==AUTO_FLDA && num_vectors<num_features))
167  {
168  // holds the fmatrix for each class
169  vector<MatrixXd> centered_class_i(C);
170  VectorXd temp=num_class;
171  MatrixXd Sw=MatrixXd::Zero(num_features, num_features);
172  for (i=0; i<C; i++)
173  {
174  centered_class_i[i]=MatrixXd::Zero(num_features, num_class[i]);
175  for (j=0; j<num_vectors; j++)
176  if (i==lvector[j])
177  centered_class_i[i].col(num_class[i]-(temp[i]--))
178  =fmatrix.col(j);
179  Sw+=(centered_class_i[i]*centered_class_i[i].transpose())
180  *num_class[i]/(float64_t)(num_class[i]-1);
181  }
182 
183  // within class matrix for cannonical variates implementation
184  MatrixXd Sb(num_features, C);
185  for (i=0; i<C; i++)
186  Sb.col(i)=sqrt(num_class[i])*(mean_class[i]-mean_total);
187 
188  MatrixXd fmatrix1=Map<MatrixXd>(feature_matrix.matrix, num_features,
189  num_vectors);
190 
191  JacobiSVD<MatrixXd> svd(fmatrix1, ComputeThinU | ComputeThinV);
192  // basis to represent the solution
193  MatrixXd Q;
194 
195  if(num_features>num_vectors)
196  {
197  j=0;
198  for (i=0;i<num_vectors;i++)
199  if (svd.singularValues()(i)>m_threshold)
200  j++;
201  else
202  break;
203  Q=svd.matrixU().leftCols(j);
204  }
205  else
206  Q=svd.matrixU();
207 
208  // Sb is the modified between scatter
209  Sb=(Q.transpose())*Sb*(Sb.transpose())*Q;
210  // Sw is the modified within scatter
211  Sw=Q.transpose()*Sw*Q;
212 
213  // to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))
214  //1.get Cw=Chol(Sw)
215  //find the decomposition of Cw'
216  HouseholderQR<MatrixXd> decomposition(Sw.llt().matrixU().transpose());
217  //2.get P=inv(Cw')*Sb
218  //MatrixXd P=decomposition.solve(Sb);
219  //3. final value to be put in SVD will be therefore:
220  // final_ output = (inv(Cw')*(P'))';
221  //MatrixXd X_final_chol=(decomposition.solve(P.transpose())).transpose();
222  JacobiSVD<MatrixXd> svd2(decomposition.solve
223  (decomposition.solve(Sb).transpose()).transpose(),ComputeThinU);
225  Map<MatrixXd> eigenVectors(m_transformation_matrix.matrix, num_features,
226  m_num_dim);
227 
228  eigenVectors=Q*(svd2.matrixU()).leftCols(m_num_dim);
229 
232  eigenValues=svd2.singularValues().topRows(m_num_dim);
233  }
234  else
235  {
236  // For holding the within class scatter.
237  MatrixXd Sw=fmatrix*fmatrix.transpose();
238 
239  // For holding the between class scatter.
240  MatrixXd Sb(num_features, C);
241 
242  for (i=0; i<C; i++)
243  Sb.col(i)=mean_class[i];
244 
245  Sb=Sb-mean_total.rowwise().replicate(C);
246  Sb=Sb*Sb.transpose();
247 
248  // calculate the Ax=b problem
249  // where A=Sw
250  // b=Sb
251  // x=M
252  // MatrixXd M=Sw.householderQr().solve(Sb);
253  // calculate the eigenvalues and eigenvectors of M.
254  EigenSolver<MatrixXd> es(Sw.householderQr().solve(Sb));
255 
256  MatrixXd all_eigenvectors=es.eigenvectors().real();
257  VectorXd all_eigenvalues=es.eigenvalues().real();
258 
259  std::vector<pair<float64_t, int32_t> > data(num_features);
260  for (i=0; i<num_features; i++)
261  {
262  data[i].first=all_eigenvalues[i];
263  data[i].second=i;
264  }
265  // sort the eigenvalues.
266  std::sort (data.begin(), data.end());
267 
268  // keep 'm_num_dim' numbers of top Eigenvalues
270  Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, m_num_dim);
271 
272  // keep 'm_num_dim' numbers of EigenVectors
273  // corresponding to their respective eigenvalues
275  Map<MatrixXd> eigenVectors(m_transformation_matrix.matrix, num_features,
276  m_num_dim);
277 
278  for (i=0; i<m_num_dim; i++)
279  {
280  eigenValues[i]=data[num_features-i-1].first;
281  eigenVectors.col(i)=all_eigenvectors.col(data[num_features-i-1].second);
282  }
283  }
284  return true;
285 }
286 
288 {
292 }
293 
295 {
296  REQUIRE(features->get_feature_class()==C_DENSE,
297  "LDA only works with dense features\n");
298 
299  REQUIRE(features->get_feature_type()==F_DREAL,
300  "LDA only works with real features\n");
301 
303  features)->get_feature_matrix();
304 
305  int32_t num_vectors=m.num_cols;
306  int32_t num_features=m.num_rows;
307 
308  SG_INFO("Transforming feature matrix\n")
311 
312  SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
313 
314  Map<MatrixXd> feature_matrix (m.matrix, num_features, num_vectors);
315 
316  feature_matrix.block (0, 0, m_num_dim, num_vectors)=
317  transform_matrix.transpose()*feature_matrix;
318 
319  SG_INFO("Form matrix of target dimension")
320  for (int32_t col=0; col<num_vectors; col++)
321  {
322  for (int32_t row=0; row<m_num_dim; row++)
323  m[col*m_num_dim+row]=feature_matrix(row, col);
324  }
325  m.num_rows=m_num_dim;
326  m.num_cols=num_vectors;
327  ((CDenseFeatures<float64_t>*)features)->set_feature_matrix(m);
328  return m;
329 }
330 
332 {
334  Map<VectorXd> resultVec(result.vector, m_num_dim);
335  Map<VectorXd> inputVec(vector.vector, vector.vlen);
336 
340 
341  resultVec=transformMat.transpose()*inputVec;
342  return result;
343 }
344 
346 {
348 }
349 
351 {
352  return m_eigenvalues_vector;
353 }
354 
356 {
357  return m_mean_vector;
358 }
359 #endif//HAVE_EIGEN3
virtual const char * get_name() const =0
the class DimensionReductionPreprocessor, a base class for preprocessors used to lower the dimensiona...
#define SG_INFO(...)
Definition: SGIO.h:118
virtual ELabelType get_label_type() const =0
virtual SGMatrix< float64_t > apply_to_feature_matrix(CFeatures *features)
Definition: FisherLDA.cpp:294
SGMatrix< float64_t > get_transformation_matrix()
Definition: FisherLDA.cpp:345
virtual bool fit(CFeatures *features, CLabels *labels, int32_t num_dimensions=0)
Definition: FisherLDA.cpp:80
virtual SGVector< float64_t > apply_to_feature_vector(SGVector< float64_t > vector)
Definition: FisherLDA.cpp:331
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
multi-class labels 0,1,...
Definition: LabelTypes.h:20
Definition: SGMatrix.h:20
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:378
virtual ~CFisherLDA()
Definition: FisherLDA.cpp:76
index_t num_rows
Definition: SGMatrix.h:376
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:494
double float64_t
Definition: common.h:50
virtual EFeatureClass get_feature_class() const =0
SGMatrix< float64_t > m_transformation_matrix
Definition: FisherLDA.h:157
SGVector< float64_t > m_mean_vector
Definition: FisherLDA.h:165
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > m_eigenvalues_vector
Definition: FisherLDA.h:167
EFLDAMethod
Definition: FisherLDA.h:49
float64_t m_threshold
Definition: FisherLDA.h:161
#define SG_ADD(...)
Definition: SGObject.h:81
SGVector< float64_t > get_mean()
Definition: FisherLDA.cpp:355
virtual EFeatureType get_feature_type() const =0
virtual void cleanup()
Definition: FisherLDA.cpp:287
SGVector< float64_t > get_eigenvalues()
Definition: FisherLDA.cpp:350

SHOGUN 机器学习工具包 - 项目文档