SHOGUN  6.1.3
FisherLDA.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Abhijeet Kislay
32  */
33 #include <shogun/lib/config.h>
34 
37 #include <shogun/io/SGIO.h>
39 #include <shogun/lib/common.h>
47 
48 using namespace std;
49 using namespace Eigen;
50 using namespace shogun;
51 
52 CFisherLDA::CFisherLDA(
53  EFLDAMethod method, float64_t thresh, float64_t gamma, bool bdc_svd)
55 {
56  initialize_parameters();
57  m_method=method;
58  m_threshold=thresh;
59  m_gamma = gamma;
60  m_bdc_svd = bdc_svd;
61 }
62 
63 void CFisherLDA::initialize_parameters()
64 {
66  m_threshold=0.01;
67  m_num_dim=0;
68  m_gamma = 0;
69  m_bdc_svd = true;
70  SG_ADD(
71  &m_method, "FLDA_method", "method for performing FLDA",
73  SG_ADD(
74  &m_num_dim, "final_dimensions", "dimensions to be retained",
76  SG_ADD(&m_gamma, "m_gamma", "Regularization parameter", MS_NOT_AVAILABLE);
77  SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
78  SG_ADD(
79  &m_transformation_matrix, "transformation_matrix",
80  "Transformation"
81  " matrix (Eigenvectors of covariance matrix).",
83  SG_ADD(&m_mean_vector, "mean_vector", "Mean Vector.", MS_NOT_AVAILABLE);
84  SG_ADD(
85  &m_eigenvalues_vector, "eigenvalues_vector", "Vector with Eigenvalues.",
87 }
88 
90 {
91 }
92 
93 bool CFisherLDA::fit(CFeatures *features, CLabels *labels, int32_t num_dimensions)
94 {
95  REQUIRE(features, "Features are not provided!\n")
96 
97  REQUIRE(features->get_feature_class()==C_DENSE,
98  "LDA only works with dense features. you provided %s\n",
99  features->get_name());
100 
101  REQUIRE(features->get_feature_type()==F_DREAL,
102  "LDA only works with real features.\n");
103 
104  REQUIRE(labels, "Labels for the given features are not specified!\n")
105 
106  REQUIRE(
107  labels->get_label_type() == LT_MULTICLASS,
108  "The labels should be of "
109  "the type MulticlassLabels! you provided %s\n",
110  labels->get_name());
111 
112  CDenseFeatures<float64_t>* dense_features =
113  static_cast<CDenseFeatures<float64_t>*>(features);
114  CMulticlassLabels* multiclass_labels =
115  static_cast<CMulticlassLabels*>(labels);
116 
117  index_t num_vectors = dense_features->get_num_vectors();
118  index_t num_features = dense_features->get_num_features();
119 
120  REQUIRE(
121  labels->get_num_labels() == num_vectors,
122  "The number of samples provided (%d)"
123  " must be equal to the number of labels provided(%d)\n",
124  num_vectors, labels->get_num_labels());
125 
126  int32_t num_class = multiclass_labels->get_num_classes();
127 
128  REQUIRE(num_class > 1, "At least two classes are needed to perform LDA.\n")
129 
130  m_num_dim=num_dimensions;
131 
132  // clip number if Dimensions to be a valid number
133  if ((m_num_dim <= 0) || (m_num_dim > (num_class - 1)))
134  m_num_dim = (num_class - 1);
135 
136  bool lda_more_efficient =
137  m_method == AUTO_FLDA && num_vectors < num_features;
138 
139  if ((m_method == CANVAR_FLDA) || lda_more_efficient)
140  return solver_canvar(dense_features, multiclass_labels);
141  else
142  return solver_classic(dense_features, multiclass_labels);
143 }
144 
146  CDenseFeatures<float64_t>* features, CMulticlassLabels* labels)
147 {
148  auto solver = std::unique_ptr<LDACanVarSolver<float64_t>>(
150  features, labels, m_num_dim, m_gamma, m_bdc_svd, m_threshold));
151 
152  m_transformation_matrix = solver->get_eigenvectors();
153  m_eigenvalues_vector = solver->get_eigenvalues();
154 
155  return true;
156 }
157 
159  CDenseFeatures<float64_t>* features, CMulticlassLabels* labels)
160 {
161  SGMatrix<float64_t> data = features->get_feature_matrix();
162  index_t num_features = data.num_rows;
163  int32_t num_class = labels->get_num_classes();
164 
165  auto solver = std::unique_ptr<LDASolver<float64_t>>(
166  new LDASolver<float64_t>(features, labels, m_gamma));
167 
168  m_mean_vector = solver->get_mean();
169  auto class_mean = solver->get_class_mean();
170  auto class_count = solver->get_class_count();
171  SGMatrix<float64_t> Sw = solver->get_within_cov();
172 
173  // For holding the between class scatter.
174  SGMatrix<float64_t> Sb(num_features, num_class);
175 
176  for (index_t i = 0; i < num_class; i++)
177  Sb.set_column(i, linalg::add(class_mean[i], m_mean_vector, 1.0, -1.0));
178  Sb = linalg::matrix_prod(Sb, Sb, false, true);
179 
180  // solve Sw * M = Sb
181  auto aux = linalg::qr_solver(Sw, Sb);
182 
183  // calculate the eigenvalues and eigenvectors of M.
184  SGVector<float64_t> eigenvalues(Sb.num_rows);
185  SGMatrix<float64_t> eigenvectors(Sb.num_rows, Sb.num_cols);
186  linalg::eigen_solver(aux, eigenvalues, eigenvectors);
187 
188  // keep 'm_num_dim' numbers of top Eigenvalues
190 
191  // keep 'm_num_dim' numbers of EigenVectors
192  // corresponding to their respective eigenvalues
194 
195  auto args = CMath::argsort(eigenvalues);
196  for (index_t i = 0; i < m_num_dim; i++)
197  {
198  index_t k = args[num_features - i - 1];
199  m_eigenvalues_vector[i] = eigenvalues[k];
200  m_transformation_matrix.set_column(k, eigenvectors.get_column(i));
201  }
202 
203  return true;
204 }
205 
207 {
211 }
212 
214 {
215  REQUIRE(features->get_feature_class()==C_DENSE,
216  "LDA only works with dense features\n");
217 
218  REQUIRE(features->get_feature_type()==F_DREAL,
219  "LDA only works with real features\n");
220 
222  ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
223 
224  int32_t num_vectors=m.num_cols;
225  int32_t num_features=m.num_rows;
226 
227  SG_INFO("Transforming feature matrix\n")
228  Map<MatrixXd> transform_matrix(
231 
232  SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
233 
234  Map<MatrixXd> feature_matrix (m.matrix, num_features, num_vectors);
235 
236  feature_matrix.block(0, 0, m_num_dim, num_vectors) =
237  transform_matrix.transpose() * feature_matrix;
238 
239  SG_INFO("Form matrix of target dimension")
240  for (int32_t col=0; col<num_vectors; col++)
241  {
242  for (int32_t row=0; row<m_num_dim; row++)
243  m[col*m_num_dim+row]=feature_matrix(row, col);
244  }
245  m.num_rows=m_num_dim;
246  m.num_cols=num_vectors;
247  ((CDenseFeatures<float64_t>*)features)->set_feature_matrix(m);
248  return m;
249 }
250 
252 {
254  Map<VectorXd> resultVec(result.vector, m_num_dim);
255  Map<VectorXd> inputVec(vector.vector, vector.vlen);
256 
258  Map<MatrixXd> transformMat(
261 
262  resultVec=transformMat.transpose()*inputVec;
263  return result;
264 }
265 
267 {
269 }
270 
272 {
273  return m_eigenvalues_vector;
274 }
275 
277 {
278  return m_mean_vector;
279 }
virtual const char * get_name() const =0
the class DimensionReductionPreprocessor, a base class for preprocessors used to lower the dimensiona...
#define SG_INFO(...)
Definition: SGIO.h:117
SGMatrix< ST > get_feature_matrix()
virtual ELabelType get_label_type() const =0
SGVector< T > get_column(index_t col) const
Definition: SGMatrix.cpp:399
virtual SGMatrix< float64_t > apply_to_feature_matrix(CFeatures *features)
Definition: FisherLDA.cpp:213
SGMatrix< float64_t > get_transformation_matrix()
Definition: FisherLDA.cpp:266
virtual bool fit(CFeatures *features, CLabels *labels, int32_t num_dimensions=0)
Definition: FisherLDA.cpp:93
void eigen_solver(const SGMatrix< T > &A, SGVector< T > &eigenvalues, SGMatrix< T > &eigenvectors)
int32_t index_t
Definition: common.h:72
virtual SGVector< float64_t > apply_to_feature_vector(SGVector< float64_t > vector)
Definition: FisherLDA.cpp:251
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,...
Definition: LabelTypes.h:20
Container< T > qr_solver(const SGMatrix< T > &A, const Container< T > &b)
float64_t m_gamma
Definition: FisherLDA.h:184
Definition: SGMatrix.h:25
Definition: basetag.h:132
void add(SGVector< T > &a, SGVector< T > &b, SGVector< T > &result, T alpha=1, T beta=1)
#define REQUIRE(x,...)
Definition: SGIO.h:181
virtual ~CFisherLDA()
Definition: FisherLDA.cpp:89
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
static SGVector< index_t > argsort(SGVector< T > vector)
Definition: Math.h:1418
Multiclass Labels for multi-class classification.
double float64_t
Definition: common.h:60
index_t num_rows
Definition: SGMatrix.h:495
virtual EFeatureClass get_feature_class() const =0
index_t num_cols
Definition: SGMatrix.h:497
bool solver_classic(CDenseFeatures< float64_t > *features, CMulticlassLabels *labels)
Definition: FisherLDA.cpp:158
SGMatrix< float64_t > m_transformation_matrix
Definition: FisherLDA.h:180
void matrix_prod(SGMatrix< T > &A, SGVector< T > &b, SGVector< T > &result, bool transpose=false)
SGVector< float64_t > m_mean_vector
Definition: FisherLDA.h:192
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:69
SGVector< float64_t > m_eigenvalues_vector
Definition: FisherLDA.h:194
EFLDAMethod
Definition: FisherLDA.h:49
float64_t m_threshold
Definition: FisherLDA.h:186
#define SG_ADD(...)
Definition: SGObject.h:93
void set_column(index_t col, const SGVector< T > vec)
Definition: SGMatrix.cpp:406
SGVector< float64_t > get_mean()
Definition: FisherLDA.cpp:276
virtual EFeatureType get_feature_type() const =0
index_t vlen
Definition: SGVector.h:571
bool solver_canvar(CDenseFeatures< float64_t > *features, CMulticlassLabels *labels)
Definition: FisherLDA.cpp:145
virtual void cleanup()
Definition: FisherLDA.cpp:206
SGVector< float64_t > get_eigenvalues()
Definition: FisherLDA.cpp:271

SHOGUN Machine Learning Toolbox - Documentation