en/current/SVMSGD_8cpp_source.html

 /*

    SVM with stochastic gradient

    Copyright (C) 2007- Leon Bottou


    This program is free software; you can redistribute it and/or

    modify it under the terms of the GNU Lesser General Public

    License as published by the Free Software Foundation; either

    version 2.1 of the License, or (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU Lesser General Public

    License along with this library; if not, write to the Free Software

    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

    $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $


    Shogun adjustments (w) 2008-2009 Soeren Sonnenburg

 */


 #include <shogun/classifier/svm/SVMSGD.h>

 #include <shogun/base/Parameter.h>

 #include <shogun/lib/Signal.h>

 #include <shogun/mathematics/Math.h>

 #include <shogun/labels/BinaryLabels.h>

 #include <shogun/loss/HingeLoss.h>


 using namespace shogun;


 CSVMSGD::CSVMSGD()

 : CLinearMachine()

 {

     init();

 }


 CSVMSGD::CSVMSGD(float64_t C)

 : CLinearMachine()

 {

     init();


     C1=C;

     C2=C;

 }


 CSVMSGD::CSVMSGD(float64_t C, CDotFeatures* traindat, CLabels* trainlab)

 : CLinearMachine()

 {

     init();

     C1=C;

     C2=C;


     set_features(traindat);

     set_labels(trainlab);

 }


 CSVMSGD::~CSVMSGD()

 {

     SG_UNREF(loss);

 }


 void CSVMSGD::set_loss_function(CLossFunction* loss_func)

 {

     SG_REF(loss_func);

     SG_UNREF(loss);

     loss=loss_func;

 }


 bool CSVMSGD::train_machine(CFeatures* data)

 {

     // allocate memory for w and initialize everyting w and bias with 0

     ASSERT(m_labels)

     ASSERT(m_labels->get_label_type() == LT_BINARY)


     if (data)

     {

         if (!data->has_property(FP_DOT))

             SG_ERROR("Specified features are not of type CDotFeatures\n")

         set_features((CDotFeatures*) data);

     }


     ASSERT(features)


     int32_t num_train_labels=m_labels->get_num_labels();

     int32_t num_vec=features->get_num_vectors();


     ASSERT(num_vec==num_train_labels)

     ASSERT(num_vec>0)


     w=SGVector<float64_t>(features->get_dim_feature_space());

     w.zero();

     bias=0;


     float64_t lambda= 1.0/(C1*num_vec);


     // Shift t in order to have a

     // reasonable initial learning rate.

     // This assumes |x| \approx 1.

     float64_t maxw = 1.0 / sqrt(lambda);

     float64_t typw = sqrt(maxw);

     float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));

     t = 1 / (eta0 * lambda);


     SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)


     //do the sgd

     calibrate();


     SG_INFO("Training on %d vectors\n", num_vec)

     CSignal::clear_cancel();


     ELossType loss_type = loss->get_loss_type();

     bool is_log_loss = false;

     if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))

         is_log_loss = true;


     for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)

     {

         count = skip;

         for (int32_t i=0; i<num_vec; i++)

         {

             float64_t eta = 1.0 / (lambda * t);

             float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);

             float64_t z = y * (features->dense_dot(i, w.vector, w.vlen) + bias);


             if (z < 1 || is_log_loss)

             {

                 float64_t etd = -eta * loss->first_derivative(z,1);

                 features->add_to_dense_vec(etd * y / wscale, i, w.vector, w.vlen);


                 if (use_bias)

                 {

                     if (use_regularized_bias)

                         bias *= 1 - eta * lambda * bscale;

                     bias += etd * y * bscale;

                 }

             }


             if (--count <= 0)

             {

                 float64_t r = 1 - eta * lambda * skip;

                 if (r < 0.8)

                     r = pow(1 - eta * lambda, skip);

                 SGVector<float64_t>::scale_vector(r, w.vector, w.vlen);

                 count = skip;

             }

             t++;

         }

     }


     float64_t wnorm =  CMath::dot(w.vector,w.vector, w.vlen);

     SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)


     return true;

 }


 void CSVMSGD::calibrate()

 {

     ASSERT(features)

     int32_t num_vec=features->get_num_vectors();

     int32_t c_dim=features->get_dim_feature_space();


     ASSERT(num_vec>0)

     ASSERT(c_dim>0)


     float64_t* c=SG_MALLOC(float64_t, c_dim);

     memset(c, 0, c_dim*sizeof(float64_t));


     SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim)


     // compute average gradient size

     int32_t n = 0;

     float64_t m = 0;

     float64_t r = 0;


     for (int32_t j=0; j<num_vec && m<=1000; j++, n++)

     {

         r += features->get_nnz_features_for_vector(j);

         features->add_to_dense_vec(1, j, c, c_dim, true);


         //waste cpu cycles for readability

         //(only changed dims need checking)

         m=CMath::max(c, c_dim);

     }


     // bias update scaling

     bscale = 0.5*m/n;


     // compute weight decay skip

     skip = (int32_t) ((16 * n * c_dim) / r);

     SG_INFO("using %d examples. skip=%d  bscale=%.6f\n", n, skip, bscale)


     SG_FREE(c);

 }


 void CSVMSGD::init()

 {

     t=1;

     C1=1;

     C2=1;

     wscale=1;

     bscale=1;

     epochs=5;

     skip=1000;

     count=1000;

     use_bias=true;


     use_regularized_bias=false;


     loss=new CHingeLoss();

     SG_REF(loss);


     m_parameters->add(&C1, "C1",  "Cost constant 1.");

     m_parameters->add(&C2, "C2",  "Cost constant 2.");

     m_parameters->add(&wscale, "wscale",  "W scale");

     m_parameters->add(&bscale, "bscale",  "b scale");

     m_parameters->add(&epochs, "epochs",  "epochs");

     m_parameters->add(&skip, "skip",  "skip");

     m_parameters->add(&count, "count",  "count");

     m_parameters->add(&use_bias, "use_bias",  "Indicates if bias is used.");

     m_parameters->add(&use_regularized_bias, "use_regularized_bias",  "Indicates if bias is regularized.");

 }

SG_INFO
#define SG_INFO(...)
Definition: SGIO.h:118

shogun::CDotFeatures::get_nnz_features_for_vector
virtual int32_t get_nnz_features_for_vector(int32_t num)=0

shogun::CLabels::get_label_type
virtual ELabelType get_label_type() const =0

LT_BINARY
binary labels +1/-1
Definition: LabelTypes.h:18

shogun::CLinearMachine::bias
float64_t bias
Definition: LinearMachine.h:160

shogun::CLossFunction
Class CLossFunction is the base class of all loss functions.
Definition: LossFunction.h:57

Math.h

shogun::CLabels
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43

shogun::CDotFeatures::dense_dot
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0

Parameter.h

shogun::CLabels::get_num_labels
virtual int32_t get_num_labels() const =0

shogun::CSVMSGD::calibrate
void calibrate()
Definition: SVMSGD.cpp:159

shogun::CFeatures::get_num_vectors
virtual int32_t get_num_vectors() const =0

SVMSGD.h

shogun::CMachine::m_labels
CLabels * m_labels
Definition: Machine.h:361

SG_ERROR
#define SG_ERROR(...)
Definition: SGIO.h:129

shogun::CSGObject::m_parameters
Parameter * m_parameters
Definition: SGObject.h:378

shogun::CDotFeatures::add_to_dense_vec
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0

shogun::CDotFeatures
Features that support dot products among other operations.
Definition: DotFeatures.h:44

SG_REF
#define SG_REF(x)
Definition: SGObject.h:51

BinaryLabels.h

shogun::CDotFeatures::get_dim_feature_space
virtual int32_t get_dim_feature_space() const =0

shogun::SGVector::scale_vector
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:822

shogun::CSVMSGD::CSVMSGD
CSVMSGD()
Definition: SVMSGD.cpp:32

shogun::L_LOGLOSS
Definition: LossFunction.h:37

shogun::Parameter::add
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37

shogun::SGVector::vlen
index_t vlen
Definition: SGVector.h:494

shogun::SGVector::zero
void zero()
Definition: SGVector.cpp:138

shogun::SGVector::vector
T * vector
Definition: SGVector.h:492

shogun::CSVMSGD::train_machine
virtual bool train_machine(CFeatures *data=NULL)
Definition: SVMSGD.cpp:70

shogun::CLossFunction::get_loss_type
virtual ELossType get_loss_type()=0

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::SGVector< float64_t >

shogun::CSignal::clear_cancel
static void clear_cancel()
Definition: Signal.cpp:129

shogun::L_LOGLOSSMARGIN
Definition: LossFunction.h:38

Signal.h

float64_t
double float64_t
Definition: common.h:50

shogun::CLinearMachine::w
SGVector< float64_t > w
Definition: LinearMachine.h:158

shogun::CLinearMachine::set_features
virtual void set_features(CDotFeatures *feat)
Definition: LinearMachine.cpp:106

shogun::CMath::max
static T max(T a, T b)
Definition: Math.h:168

shogun::CLinearMachine
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63

shogun::CMath::dot
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627

shogun::CSignal::cancel_computations
static bool cancel_computations()
Definition: Signal.h:86

HingeLoss.h

shogun::CLinearMachine::features
CDotFeatures * features
Definition: LinearMachine.h:162

SG_UNREF
#define SG_UNREF(x)
Definition: SGObject.h:52

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

shogun::CSVMSGD::~CSVMSGD
virtual ~CSVMSGD()
Definition: SVMSGD.cpp:58

shogun::CSVMSGD::set_loss_function
void set_loss_function(CLossFunction *loss_func)
Definition: SVMSGD.cpp:63

shogun::FP_DOT
Definition: FeatureTypes.h:68

shogun::CBinaryLabels
Binary Labels for binary classification.
Definition: BinaryLabels.h:37

shogun::CHingeLoss
CHingeLoss implements the hinge loss function.
Definition: HingeLoss.h:29

shogun::CLossFunction::first_derivative
virtual float64_t first_derivative(float64_t prediction, float64_t label)
Definition: LossFunction.h:101

shogun::CFeatures::has_property
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295

shogun::CMachine::set_labels
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65

shogun::ELossType
ELossType
shogun loss type
Definition: LossFunction.h:28