SHOGUN  6.1.3
LDA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2014 Abhijeet Kislay
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #include <shogun/classifier/LDA.h>
19 #include <vector>
20 
21 using namespace Eigen;
22 using namespace shogun;
23 
24 CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd)
25  : CLinearMachine(false)
26 {
27  init();
28  m_method=method;
29  m_gamma=gamma;
30  m_bdc_svd = bdc_svd;
31 }
32 
34  float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab,
35  ELDAMethod method, bool bdc_svd)
36  : CLinearMachine(false), m_gamma(gamma)
37 {
38  init();
39  set_features(traindat);
40  set_labels(trainlab);
41  m_method=method;
42  m_gamma=gamma;
43  m_bdc_svd = bdc_svd;
44 }
45 
46 void CLDA::init()
47 {
49  m_gamma=0;
50  m_bdc_svd = true;
51  SG_ADD(
52  (machine_int_t*)&m_method, "m_method",
53  "Method used for LDA calculation", MS_NOT_AVAILABLE);
54  SG_ADD(
55  (machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
57  SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
58 }
59 
61 {
62 }
63 
65 {
66  REQUIRE(m_labels, "Labels for the given features are not specified!\n")
67  REQUIRE(
69  "The labels should of type CBinaryLabels! Provided type is %s \n",
70  m_labels->get_name())
71 
72  if(data)
73  {
74  if(!data->has_property(FP_DOT))
75  SG_ERROR("Specified features are not of type CDotFeatures\n")
76  set_features((CDotFeatures*) data);
77  }
78  else
79  {
80  data = get_features();
81  REQUIRE(data, "Features have not been provided.\n")
82  }
83 
84  REQUIRE(
86  "Number of training examples(%d) should be equal to number of labels "
87  "(%d)!\n",
89 
90  REQUIRE(
92  "LDA only works with dense features")
93 
94  if(data->get_feature_type() == F_SHORTREAL)
95  return CLDA::train_machine_templated<float32_t>();
96  else if(data->get_feature_type() == F_DREAL)
97  return CLDA::train_machine_templated<float64_t>();
98  else if(data->get_feature_type() == F_LONGREAL)
99  return CLDA::train_machine_templated<floatmax_t>();
100 
101  return false;
102 }
103 
104 template <typename ST>
106 {
107  index_t num_feat = ((CDenseFeatures<ST>*)features)->get_num_features();
108  index_t num_vec = features->get_num_vectors();
109  ;
110 
111  bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat);
112 
113  if (m_method == SVD_LDA || lda_more_efficient)
114  return solver_svd<ST>();
115  else
116  return solver_classic<ST>();
117 }
118 
119 template <typename ST>
121 {
122  auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
123 
124  // keep just one dimension to do binary classification
125  const index_t projection_dim = 1;
126  auto solver = std::unique_ptr<LDACanVarSolver<ST>>(
128  dense_feat,
129  new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
130  projection_dim, m_gamma, m_bdc_svd));
131 
132  SGVector<ST> w_st(solver->get_eigenvectors());
133 
134  auto class_mean = solver->get_class_mean();
135  ST m_neg = linalg::dot(w_st, class_mean[0]);
136  ST m_pos = linalg::dot(w_st, class_mean[1]);
137 
138  // change the sign of w if needed to get the correct labels
139  float64_t sign = (m_pos > m_neg) ? 1 : -1;
140 
141  SGVector<float64_t> w(dense_feat->get_num_features());
142  // copy w_st into w
143  for (index_t i = 0; i < w.size(); ++i)
144  w[i] = sign * w_st[i];
145  set_w(w);
146 
147  set_bias(-0.5 * sign * (m_neg + m_pos));
148 
149  return true;
150 }
151 
152 template <typename ST>
154 {
155  auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
156  index_t num_feat = dense_feat->get_num_features();
157 
158  auto solver = std::unique_ptr<LDASolver<ST>>(
159  new LDASolver<ST>(
160  dense_feat,
161  new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
162  m_gamma));
163 
164  auto class_mean = solver->get_class_mean();
165  auto class_count = solver->get_class_count();
166  SGMatrix<ST> scatter_matrix = solver->get_within_cov();
167 
168  // the usual way
169  // we need to find a Basic Linear Solution of A.x=b for 'x'.
170  // Instead of crudely Inverting A, we go for solve() using Decompositions.
171  // where:
172  // MatrixXd A=scatter;
173  // VectorXd b=mean_pos-mean_neg;
174  // VectorXd x=w;
175  auto decomposition = linalg::cholesky_factor(scatter_matrix);
177  decomposition,
178  linalg::add(class_mean[1], class_mean[0], (ST)1, (ST)-1));
179 
180  // get the weights w_neg(for -ve class) and w_pos(for +ve class)
181  auto w_neg = linalg::cholesky_solver(decomposition, class_mean[0]);
182  auto w_pos = linalg::cholesky_solver(decomposition, class_mean[1]);
183 
184  SGVector<float64_t> w(num_feat);
185  // copy w_st into w
186  for (index_t i = 0; i < w.size(); ++i)
187  w[i] = (float64_t)w_st[i];
188  set_w(w);
189 
190  // get the bias.
191  set_bias(
192  (float64_t)(
193  0.5 * (linalg::dot(w_neg, class_mean[0]) -
194  linalg::dot(w_pos, class_mean[1]))));
195 
196  return true;
197 }
virtual const char * get_name() const =0
virtual bool train_machine(CFeatures *data=NULL)
Definition: LDA.cpp:64
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
virtual void set_w(const SGVector< float64_t > src_w)
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
bool solver_svd()
Definition: LDA.cpp:120
virtual int32_t get_num_labels() const =0
SGMatrix< T > cholesky_factor(const SGMatrix< T > &A, const bool lower=true)
Definition: SGMatrix.h:25
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:436
void add(SGVector< T > &a, SGVector< T > &b, SGVector< T > &result, T alpha=1, T beta=1)
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:181
T dot(const SGVector< T > &a, const SGVector< T > &b)
CLDA(float64_t gamma=0, ELDAMethod method=AUTO_LDA, bool bdc_svd=true)
Definition: LDA.cpp:24
virtual void set_features(CDotFeatures *feat)
Definition: LDA.h:167
int32_t get_num_features() const
virtual CDotFeatures * get_features()
Features that support dot products among other operations.
Definition: DotFeatures.h:44
bool train_machine_templated()
Definition: LDA.cpp:105
Multiclass Labels for multi-class classification.
int32_t size() const
Definition: SGVector.h:156
SGVector< T > cholesky_solver(const SGMatrix< T > &L, const SGVector< T > &b, const bool lower=true)
double float64_t
Definition: common.h:60
bool m_bdc_svd
Definition: LDA.h:225
shogun vector
float64_t m_gamma
Definition: LDA.h:221
virtual EFeatureClass get_feature_class() const =0
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
ELDAMethod
Definition: LDA.h:26
bool solver_classic()
Definition: LDA.cpp:153
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual ~CLDA()
Definition: LDA.cpp:60
int machine_int_t
Definition: common.h:69
The class Features is the base class of all feature objects.
Definition: Features.h:69
void init()
Definition: LDA.cpp:46
ELDAMethod m_method
Definition: LDA.h:223
virtual void set_bias(float64_t b)
#define SG_ADD(...)
Definition: SGObject.h:93
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:72
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation