en/current/OnlineSVMSGD_8cpp_source.html

 /*

    SVM with stochastic gradient

    Copyright (C) 2007- Leon Bottou


    This program is free software; you can redistribute it and/or

    modify it under the terms of the GNU Lesser General Public

    License as published by the Free Software Foundation; either

    version 2.1 of the License, or (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU Lesser General Public

    License along with this library; if not, write to the Free Software

    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

    $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $


    Shogun adjustments (w) 2008-2009 Soeren Sonnenburg

 */


 #include <shogun/classifier/svm/OnlineSVMSGD.h>

 #include <shogun/mathematics/Math.h>

 #include <shogun/base/Parameter.h>

 #include <shogun/lib/Signal.h>

 #include <shogun/loss/HingeLoss.h>


 using namespace shogun;


 COnlineSVMSGD::COnlineSVMSGD()

 : COnlineLinearMachine()

 {

     init();

 }


 COnlineSVMSGD::COnlineSVMSGD(float64_t C)

 : COnlineLinearMachine()

 {

     init();


     C1=C;

     C2=C;

 }


 COnlineSVMSGD::COnlineSVMSGD(float64_t C, CStreamingDotFeatures* traindat)

 : COnlineLinearMachine()

 {

     init();

     C1=C;

     C2=C;


     set_features(traindat);

 }


 COnlineSVMSGD::~COnlineSVMSGD()

 {

     SG_UNREF(loss);

 }


 void COnlineSVMSGD::set_loss_function(CLossFunction* loss_func)

 {

     SG_REF(loss_func);

     SG_UNREF(loss);

     loss=loss_func;

 }


 bool COnlineSVMSGD::train(CFeatures* data)

 {

     if (data)

     {

         if (!data->has_property(FP_STREAMING_DOT))

             SG_ERROR("Specified features are not of type CStreamingDotFeatures\n")

         set_features((CStreamingDotFeatures*) data);

     }


     features->start_parser();


     // allocate memory for w and initialize everyting w and bias with 0

     ASSERT(features)

     ASSERT(features->get_has_labels())

     if (w)

         SG_FREE(w);

     w_dim=1;

     w=new float32_t;

     bias=0;


     // Shift t in order to have a

     // reasonable initial learning rate.

     // This assumes |x| \approx 1.

     float64_t maxw = 1.0 / sqrt(lambda);

     float64_t typw = sqrt(maxw);

     float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));

     t = 1 / (eta0 * lambda);


     SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)


     //do the sgd

     calibrate();

     if (features->is_seekable())

         features->reset_stream();


     CSignal::clear_cancel();


     ELossType loss_type = loss->get_loss_type();

     bool is_log_loss = false;

     if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))

         is_log_loss = true;


     int32_t vec_count;

     for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)

     {

         vec_count=0;

         count = skip;

         while (features->get_next_example())

         {

             vec_count++;

             // Expand w vector if more features are seen in this example

             features->expand_if_required(w, w_dim);


             float64_t eta = 1.0 / (lambda * t);

             float64_t y = features->get_label();

             float64_t z = y * (features->dense_dot(w, w_dim) + bias);


             if (z < 1 || is_log_loss)

             {

                 float64_t etd = -eta * loss->first_derivative(z,1);

                 features->add_to_dense_vec(etd * y / wscale, w, w_dim);


                 if (use_bias)

                 {

                     if (use_regularized_bias)

                         bias *= 1 - eta * lambda * bscale;

                     bias += etd * y * bscale;

                 }

             }


             if (--count <= 0)

             {

                 float32_t r = 1 - eta * lambda * skip;

                 if (r < 0.8)

                     r = pow(1 - eta * lambda, skip);

                 SGVector<float32_t>::scale_vector(r, w, w_dim);

                 count = skip;

             }

             t++;


             features->release_example();

         }


         // If the stream is seekable, reset the stream to the first

         // example (for epochs > 1)

         if (features->is_seekable() && e < epochs-1)

             features->reset_stream();

         else

             break;


     }


     features->end_parser();

     float64_t wnorm =  CMath::dot(w,w, w_dim);

     SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)


     return true;

 }


 void COnlineSVMSGD::calibrate(int32_t max_vec_num)

 {

     int32_t c_dim=1;

     float32_t* c=SG_CALLOC(float32_t, c_dim);


     // compute average gradient size

     int32_t n = 0;

     float64_t m = 0;

     float64_t r = 0;


     while (features->get_next_example())

     {

         //Expand c if more features are seen in this example

         features->expand_if_required(c, c_dim);


         r += features->get_nnz_features_for_vector();

         features->add_to_dense_vec(1, c, c_dim, true);


         //waste cpu cycles for readability

         //(only changed dims need checking)

         m=CMath::max(c, c_dim);

         n++;


         features->release_example();

         if (n>=max_vec_num || m > 1000)

             break;

     }


     SG_PRINT("Online SGD calibrated using %d vectors.\n", n)


     // bias update scaling

     bscale = 0.5*m/n;


     // compute weight decay skip

     skip = (int32_t) ((16 * n * c_dim) / r);


     SG_INFO("using %d examples. skip=%d  bscale=%.6f\n", n, skip, bscale)


     SG_FREE(c);

 }


 void COnlineSVMSGD::init()

 {

     t=1;

     C1=1;

     C2=1;

     lambda=1e-4;

     wscale=1;

     bscale=1;

     epochs=1;

     skip=1000;

     count=1000;

     use_bias=true;


     use_regularized_bias=false;


     loss=new CHingeLoss();

     SG_REF(loss);


     m_parameters->add(&C1, "C1",  "Cost constant 1.");

     m_parameters->add(&C2, "C2",  "Cost constant 2.");

     m_parameters->add(&lambda, "lambda", "Regularization parameter.");

     m_parameters->add(&wscale, "wscale",  "W scale");

     m_parameters->add(&bscale, "bscale",  "b scale");

     m_parameters->add(&epochs, "epochs",  "epochs");

     m_parameters->add(&skip, "skip",  "skip");

     m_parameters->add(&count, "count",  "count");

     m_parameters->add(&use_bias, "use_bias",  "Indicates if bias is used.");

     m_parameters->add(&use_regularized_bias, "use_regularized_bias",  "Indicates if bias is regularized.");

 }

SG_INFO
#define SG_INFO(...)
Definition: SGIO.h:118

shogun::COnlineLinearMachine
Class OnlineLinearMachine is a generic interface for linear machines like classifiers which work thro...
Definition: OnlineLinearMachine.h:53

shogun::CLossFunction
Class CLossFunction is the base class of all loss functions.
Definition: LossFunction.h:57

shogun::CStreamingFeatures::start_parser
virtual void start_parser()=0

Math.h

shogun::CStreamingFeatures::reset_stream
virtual void reset_stream()
Definition: StreamingFeatures.cpp:45

shogun::COnlineSVMSGD::~COnlineSVMSGD
virtual ~COnlineSVMSGD()
Definition: OnlineSVMSGD.cpp:56

shogun::CStreamingDotFeatures::dense_dot
virtual float32_t dense_dot(const float32_t *vec2, int32_t vec2_len)=0

Parameter.h

shogun::COnlineSVMSGD::COnlineSVMSGD
COnlineSVMSGD()
Definition: OnlineSVMSGD.cpp:31

SG_ERROR
#define SG_ERROR(...)
Definition: SGIO.h:129

shogun::COnlineSVMSGD::calibrate
void calibrate(int32_t max_vec_num=1000)
Definition: OnlineSVMSGD.cpp:167

shogun::CSGObject::m_parameters
Parameter * m_parameters
Definition: SGObject.h:378

OnlineSVMSGD.h

SG_REF
#define SG_REF(x)
Definition: SGObject.h:51

shogun::SGVector::scale_vector
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:822

shogun::COnlineLinearMachine::set_features
virtual void set_features(CStreamingDotFeatures *feat)
Definition: OnlineLinearMachine.h:149

shogun::COnlineLinearMachine::w
float32_t * w
Definition: OnlineLinearMachine.h:255

shogun::COnlineLinearMachine::bias
float32_t bias
Definition: OnlineLinearMachine.h:257

shogun::L_LOGLOSS
Definition: LossFunction.h:37

shogun::Parameter::add
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37

SG_PRINT
#define SG_PRINT(...)
Definition: SGIO.h:137

shogun::CStreamingFeatures::get_next_example
virtual bool get_next_example()=0

shogun::CLossFunction::get_loss_type
virtual ELossType get_loss_type()=0

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::CSignal::clear_cancel
static void clear_cancel()
Definition: Signal.cpp:129

shogun::L_LOGLOSSMARGIN
Definition: LossFunction.h:38

shogun::COnlineSVMSGD::set_loss_function
void set_loss_function(CLossFunction *loss_func)
Definition: OnlineSVMSGD.cpp:61

shogun::COnlineSVMSGD::train
virtual bool train(CFeatures *data=NULL)
Definition: OnlineSVMSGD.cpp:68

Signal.h

float64_t
double float64_t
Definition: common.h:50

shogun::CStreamingDotFeatures::expand_if_required
virtual void expand_if_required(float32_t *&vec, int32_t &len)
Definition: StreamingDotFeatures.cpp:53

shogun::CMath::max
static T max(T a, T b)
Definition: Math.h:168

shogun::CMath::dot
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627

shogun::CStreamingDotFeatures
Streaming features that support dot products among other operations.
Definition: StreamingDotFeatures.h:47

shogun::CSignal::cancel_computations
static bool cancel_computations()
Definition: Signal.h:86

float32_t
float float32_t
Definition: common.h:49

shogun::CStreamingDotFeatures::add_to_dense_vec
virtual void add_to_dense_vec(float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false)=0

HingeLoss.h

SG_UNREF
#define SG_UNREF(x)
Definition: SGObject.h:52

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

shogun::CStreamingFeatures::end_parser
virtual void end_parser()=0

shogun::COnlineLinearMachine::features
CStreamingDotFeatures * features
Definition: OnlineLinearMachine.h:259

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

shogun::CStreamingFeatures::release_example
virtual void release_example()=0

shogun::FP_STREAMING_DOT
Definition: FeatureTypes.h:69

shogun::CStreamingFeatures::get_label
virtual float64_t get_label()=0

shogun::CHingeLoss
CHingeLoss implements the hinge loss function.
Definition: HingeLoss.h:29

shogun::CStreamingDotFeatures::get_nnz_features_for_vector
virtual int32_t get_nnz_features_for_vector()
Definition: StreamingDotFeatures.cpp:81

shogun::CStreamingFeatures::get_has_labels
virtual bool get_has_labels()
Definition: StreamingFeatures.cpp:35

shogun::CLossFunction::first_derivative
virtual float64_t first_derivative(float64_t prediction, float64_t label)
Definition: LossFunction.h:101

shogun::CStreamingFeatures::is_seekable
virtual bool is_seekable()
Definition: StreamingFeatures.cpp:40

shogun::CFeatures::has_property
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295

shogun::ELossType
ELossType
shogun loss type
Definition: LossFunction.h:28

shogun::COnlineLinearMachine::w_dim
int32_t w_dim
Definition: OnlineLinearMachine.h:253