en/latest/LDA_8cpp_source.html

 /*

  * This program is free software; you can redistribute it and/or modify

  * it under the terms of the GNU General Public License as published by

  * the Free Software Foundation; either version 3 of the License, or

  * (at your option) any later version.

  *

  * Written (W) 1999-2009 Soeren Sonnenburg

  * Written (W) 2014 Abhijeet Kislay

  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society

  */

 #include <shogun/lib/config.h>


 #include <shogun/lib/common.h>

 #include <shogun/machine/Machine.h>

 #include <shogun/machine/LinearMachine.h>

 #include <shogun/classifier/LDA.h>

 #include <shogun/labels/Labels.h>

 #include <shogun/labels/BinaryLabels.h>

 #include <shogun/mathematics/Math.h>

 #include <shogun/mathematics/eigen3.h>


 using namespace Eigen;

 using namespace shogun;


 CLDA::CLDA(float64_t gamma, ELDAMethod method)

     :CLinearMachine()

 {

     init();

     m_method=method;

     m_gamma=gamma;

 }


 CLDA::CLDA(float64_t gamma, CDenseFeatures<float64_t> *traindat,

             CLabels *trainlab, ELDAMethod method)

     :CLinearMachine(), m_gamma(gamma)

 {

     init();

     set_features(traindat);

     set_labels(trainlab);

     m_method=method;

     m_gamma=gamma;

 }


 void CLDA::init()

 {

     m_method=AUTO_LDA;

     m_gamma=0;

     SG_ADD((machine_int_t*) &m_method, "m_method",

         "Method used for LDA calculation", MS_NOT_AVAILABLE);

     SG_ADD((machine_int_t*) &m_gamma, "m_gamma",

         "Regularization parameter", MS_NOT_AVAILABLE);

 }


 CLDA::~CLDA()

 {

 }


 bool CLDA::train_machine(CFeatures *data)

 {

     REQUIRE(m_labels, "Labels for the given features are not specified!\n")

     REQUIRE(m_labels->get_label_type()==LT_BINARY, "The labels should of type"

             " CBinaryLabels! you provided %s \n",m_labels->get_name())


     if(data)

     {

         if(!data->has_property(FP_DOT))

             SG_ERROR("Specified features are not of type CDotFeatures\n")

         set_features((CDotFeatures*) data);

     }


     REQUIRE(features, "Features are not provided!\n")

     SGVector<int32_t>train_labels=((CBinaryLabels *)m_labels)->get_int_labels();

     REQUIRE(train_labels.vector,"Provided Labels are empty!\n")


     SGMatrix<float64_t>feature_matrix=((CDenseFeatures<float64_t>*)features)

                                         ->get_feature_matrix();

     int32_t num_feat=feature_matrix.num_rows;

     int32_t num_vec=feature_matrix.num_cols;

     REQUIRE(num_vec==train_labels.vlen,"Number of training examples(%d) should be "

         "equal to number of labels specified(%d)!\n", num_vec, train_labels.vlen);


     SGVector<int32_t> classidx_neg(num_vec);

     SGVector<int32_t> classidx_pos(num_vec);


     int32_t i=0;

     int32_t num_neg=0;

     int32_t num_pos=0;


     for(i=0; i<train_labels.vlen; i++)

     {

         if (train_labels.vector[i]==-1)

             classidx_neg[num_neg++]=i;


         else if(train_labels.vector[i]==+1)

             classidx_pos[num_pos++]=i;

     }


     w=SGVector<float64_t>(num_feat);

     w.zero();

     MatrixXd fmatrix=Map<MatrixXd>(feature_matrix.matrix, num_feat, num_vec);

     VectorXd mean_neg(num_feat);

     mean_neg=VectorXd::Zero(num_feat);

     VectorXd mean_pos(num_feat);

     mean_pos=VectorXd::Zero(num_feat);


     //mean neg

     for(i=0; i<num_neg; i++)

         mean_neg+=fmatrix.col(classidx_neg[i]);

     mean_neg/=(float64_t)num_neg;


     // get m(-ve) - mean(-ve)

     for(i=0; i<num_neg; i++)

         fmatrix.col(classidx_neg[i])-=mean_neg;


     //mean pos

     for(i=0; i<num_pos; i++)

         mean_pos+=fmatrix.col(classidx_pos[i]);

     mean_pos/=(float64_t)num_pos;


     // get m(+ve) - mean(+ve)

     for(i=0; i<num_pos; i++)

         fmatrix.col(classidx_pos[i])-=mean_pos;


     SGMatrix<float64_t>scatter_matrix(num_feat, num_feat);

     Map<MatrixXd> scatter(scatter_matrix.matrix, num_feat, num_feat);


     if (m_method == FLD_LDA || (m_method==AUTO_LDA && num_vec>num_feat))

     {

         // covariance matrix.

         MatrixXd cov_mat(num_feat, num_feat);

         cov_mat=fmatrix*fmatrix.transpose();

         scatter=cov_mat/(num_vec-1);

         float64_t trace=scatter.trace();

         double s=1.0-m_gamma;

         scatter *=s;

         scatter.diagonal()+=VectorXd::Constant(num_feat, trace*m_gamma/num_feat);


         // the usual way

         // we need to find a Basic Linear Solution of A.x=b for 'x'.

         // Instead of crudely Inverting A, we go for solve() using Decompositions.

         // where:

         // MatrixXd A=scatter;

         // VectorXd b=mean_pos-mean_neg;

         // VectorXd x=w;

         Map<VectorXd> x(w.vector, num_feat);

         LLT<MatrixXd> decomposition(scatter);

         x=decomposition.solve(mean_pos-mean_neg);


         // get the weights w_neg(for -ve class) and w_pos(for +ve class)

         VectorXd w_neg=decomposition.solve(mean_neg);

         VectorXd w_pos=decomposition.solve(mean_pos);


         // get the bias.

         bias=0.5*(w_neg.dot(mean_neg)-w_pos.dot(mean_pos));

     }


     else

     {

         //for algorithmic detail, please refer to section 16.3.1. of Bayesian

         //Reasoning and Machine Learning by David Barber.


         //we will perform SVD here.

         MatrixXd fmatrix1=Map<MatrixXd>(feature_matrix.matrix, num_feat, num_vec);


         // to hold the centered positive and negative class data

         MatrixXd cen_pos(num_feat,num_pos);

         MatrixXd cen_neg(num_feat,num_neg);


         for(i=0; i<num_pos;i++)

             cen_pos.col(i)=fmatrix.col(classidx_pos[i]);


         for(i=0; i<num_neg;i++)

             cen_neg.col(i)=fmatrix.col(classidx_neg[i]);


         //+ve covariance matrix

         cen_pos=cen_pos*cen_pos.transpose()/(float64_t(num_pos-1));


         //-ve covariance matrix

         cen_neg=cen_neg*cen_neg.transpose()/(float64_t(num_neg-1));


         //within class matrix

         MatrixXd Sw= num_pos*cen_pos+num_neg*cen_neg;

         float64_t trace=Sw.trace();

         double s=1.0-m_gamma;

         Sw *=s;

         Sw.diagonal()+=VectorXd::Constant(num_feat, trace*m_gamma/num_feat);


         //total mean

         VectorXd mean_total=(num_pos*mean_pos+num_neg*mean_neg)/(float64_t)num_vec;


         //between class matrix

         MatrixXd Sb(num_feat,2);

         Sb.col(0)=sqrt(num_pos)*(mean_pos-mean_total);

         Sb.col(1)=sqrt(num_neg)*(mean_neg-mean_total);


         JacobiSVD<MatrixXd> svd(fmatrix1, ComputeThinU);


         // basis to represent the solution

         MatrixXd Q=svd.matrixU();

         // modified between class scatter

         Sb=Q.transpose()*(Sb*(Sb.transpose()))*Q;


         // modified within class scatter

         Sw=Q.transpose()*Sw*Q;


         // to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))

         //1.get Cw=Chol(Sw)

         //find the decomposition of Cw'.

         HouseholderQR<MatrixXd> decomposition(Sw.llt().matrixU().transpose());


         //2.get P=inv(Cw')*Sb_new

         //MatrixXd P=decomposition.solve(Sb);

         //3. final value to be put in SVD will be therefore:

         // final_ output =(inv(Cw')*(P'))'

         JacobiSVD<MatrixXd> svd2(decomposition.solve((decomposition.solve(Sb))

                     .transpose()).transpose(), ComputeThinU);


         // Since this is a linear classifier, with only binary classes,

         // we need to keep only the 1st eigenvector.

         Map<VectorXd> x(w.vector, num_feat);

         x=Q*(svd2.matrixU().col(0));

         // get the bias

         bias=(x.transpose()*mean_total);

         bias=bias*(-1);

     }

     return true;

 }

shogun::CSGObject::get_name
virtual const char * get_name() const =0

shogun::CLDA::train_machine
virtual bool train_machine(CFeatures *data=NULL)
Definition: LDA.cpp:58

LinearMachine.h

shogun::CLabels::get_label_type
virtual ELabelType get_label_type() const =0

LT_BINARY
binary labels +1/-1
Definition: LabelTypes.h:18

shogun::CLinearMachine::bias
float64_t bias
Definition: LinearMachine.h:192

shogun::CDenseFeatures< float64_t >

Math.h

shogun::SGMatrix::matrix
T * matrix
Definition: SGMatrix.h:372

shogun::CLabels
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43

shogun::FLD_LDA
Definition: LDA.h:37

eigen3.h

Eigen::Map
Definition: SGMatrix.h:24

Eigen
Definition: SGMatrix.h:20

config.h

shogun::CMachine::m_labels
CLabels * m_labels
Definition: Machine.h:361

SG_ERROR
#define SG_ERROR(...)
Definition: SGIO.h:129

REQUIRE
#define REQUIRE(x,...)
Definition: SGIO.h:206

shogun::SGMatrix::num_cols
index_t num_cols
Definition: SGMatrix.h:376

shogun::CLDA::set_features
virtual void set_features(CDotFeatures *feat)
Definition: LDA.h:143

shogun::CDotFeatures
Features that support dot products among other operations.
Definition: DotFeatures.h:44

shogun::SGMatrix::num_rows
index_t num_rows
Definition: SGMatrix.h:374

shogun::SGMatrix< float64_t >

BinaryLabels.h

shogun::MS_NOT_AVAILABLE
Definition: SGObject.h:92

shogun::SGVector::vlen
index_t vlen
Definition: SGVector.h:494

shogun::SGVector::zero
void zero()
Definition: SGVector.cpp:136

shogun::SGVector::vector
T * vector
Definition: SGVector.h:492

shogun::SGVector< int32_t >

shogun::CLDA::CLDA
CLDA(float64_t gamma=0, ELDAMethod method=AUTO_LDA)
Definition: LDA.cpp:25

float64_t
double float64_t
Definition: common.h:50

shogun::AUTO_LDA
Definition: LDA.h:31

shogun::CLinearMachine::w
SGVector< float64_t > w
Definition: LinearMachine.h:190

shogun::CLDA::m_gamma
float64_t m_gamma
Definition: LDA.h:171

shogun::CLinearMachine
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63

Labels.h

shogun::ELDAMethod
ELDAMethod
Definition: LDA.h:26

Machine.h

LDA.h

shogun::CLinearMachine::features
CDotFeatures * features
Definition: LinearMachine.h:194

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

shogun::CLDA::~CLDA
virtual ~CLDA()
Definition: LDA.cpp:54

machine_int_t
int machine_int_t
Definition: common.h:59

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

shogun::CLDA::init
void init()
Definition: LDA.cpp:44

Eigen::Matrix
Definition: SGMatrix.h:22

shogun::CLDA::m_method
ELDAMethod m_method
Definition: LDA.h:173

common.h

shogun::FP_DOT
Definition: FeatureTypes.h:68

shogun::CBinaryLabels
Binary Labels for binary classification.
Definition: BinaryLabels.h:37

SG_ADD
#define SG_ADD(...)
Definition: SGObject.h:84

shogun::CFeatures::has_property
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295

shogun::CMachine::set_labels
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65