SHOGUN: SGDQN.cpp Source File

Go to the documentation of this file.
00001 /*
00002    SVM with Quasi-Newton stochastic gradient
00003    Copyright (C) 2009- Antoine Bordes
00004 
00005    This program is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software
00017    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00018 
00019    Shogun adjustments (w) 2011 Siddharth Kherada
00020 */
00021 
00022 #include <shogun/classifier/svm/SGDQN.h>
00023 #include <shogun/base/Parameter.h>
00024 #include <shogun/lib/Signal.h>
00025 #include <shogun/mathematics/Math.h>
00026 #include <shogun/loss/HingeLoss.h>
00027 
00028 using namespace shogun;
00029 
00030 CSGDQN::CSGDQN()
00031 : CLinearMachine()
00032 {
00033     init();
00034 }
00035 
00036 CSGDQN::CSGDQN(float64_t C)
00037 : CLinearMachine()
00038 {
00039     init();
00040 
00041     C1=C;
00042     C2=C;
00043 }
00044 
00045 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00046 : CLinearMachine()
00047 {
00048     init();
00049     C1=C;
00050     C2=C;
00051 
00052     set_features(traindat);
00053     set_labels(trainlab);
00054 }
00055 
00056 CSGDQN::~CSGDQN()
00057 {
00058     SG_UNREF(loss);
00059 }
00060 
00061 void CSGDQN::set_loss_function(CLossFunction* loss_func)
00062 {
00063     if (loss)
00064         SG_UNREF(loss);
00065     loss=loss_func;
00066     SG_REF(loss);
00067 }
00068 
00069 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
00070 {
00071     for (int32_t i=0; i < dim;i++)
00072     {
00073         float64_t diffw=W_1[i]-W[i];
00074         if(diffw)
00075             B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
00076         else
00077             B[i]+=1/lambda;
00078     }
00079 }
00080 
00081 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2)
00082 {
00083     for (int32_t i=0; i < dim;i++)
00084     {
00085         if(B[i])
00086         {
00087             Bc[i] = Bc[i] * c1 + B[i] * c2;
00088             Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
00089         }
00090     }
00091 }
00092 
00093 bool CSGDQN::train(CFeatures* data)
00094 {
00095 
00096     ASSERT(labels);
00097 
00098     if (data)
00099     {
00100         if (!data->has_property(FP_DOT))
00101             SG_ERROR("Specified features are not of type CDotFeatures\n");
00102         set_features((CDotFeatures*) data);
00103     }
00104 
00105     ASSERT(features);
00106     ASSERT(labels->is_two_class_labeling());
00107 
00108     int32_t num_train_labels=labels->get_num_labels();
00109     w_dim=features->get_dim_feature_space();
00110     int32_t num_vec=features->get_num_vectors();
00111 
00112     ASSERT(num_vec==num_train_labels);
00113     ASSERT(num_vec>0);
00114 
00115     SG_FREE(w);
00116     w=SG_MALLOC(float64_t, w_dim);
00117     memset(w, 0, w_dim*sizeof(float64_t));
00118 
00119     float64_t lambda= 1.0/(C1*num_vec);
00120 
00121     // Shift t in order to have a
00122     // reasonable initial learning rate.
00123     // This assumes |x| \approx 1.
00124     float64_t maxw = 1.0 / sqrt(lambda);
00125     float64_t typw = sqrt(maxw);
00126     float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00127     t = 1 / (eta0 * lambda);
00128 
00129     SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00130 
00131 
00132     float64_t* Bc=SG_MALLOC(float64_t, w_dim);
00133     CMath::fill_vector(Bc, w_dim, 1/lambda);
00134 
00135     float64_t* result=SG_MALLOC(float64_t, w_dim);
00136     float64_t* B=SG_MALLOC(float64_t, w_dim);
00137     float64_t* w_1=SG_MALLOC(float64_t, w_dim);
00138 
00139     //Calibrate
00140     calibrate();
00141 
00142     SG_INFO("Training on %d vectors\n", num_vec);
00143     CSignal::clear_cancel();
00144 
00145     ELossType loss_type = loss->get_loss_type();
00146     bool is_log_loss = false;
00147     if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00148         is_log_loss = true;
00149 
00150     for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00151     {
00152         count = skip;
00153         bool updateB=false;
00154         for (int32_t i=0; i<num_vec; i++)
00155         {
00156             SGVector<float64_t> v = features->get_computed_dot_feature_vector(i);
00157             ASSERT(w_dim==v.vlen);
00158             float64_t eta = 1.0/t;
00159             float64_t y = labels->get_label(i);
00160             float64_t z = y * features->dense_dot(i, w, w_dim);
00161             if(updateB==true)
00162             {
00163                 if (z < 1 || is_log_loss)
00164                 {
00165                     w_1=w;
00166                     float64_t loss_1=-loss->first_derivative(z,1);
00167                     CMath::vector_multiply(result,Bc,v.vector,w_dim);
00168                     CMath::add(w,eta*loss_1*y,result,1.0,w,w_dim);
00169                     float64_t z2 = y * features->dense_dot(i, w, w_dim);
00170                     float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
00171                     if(diffloss)
00172                     {
00173                         compute_ratio(w,w_1,B,v.vector,w_dim,lambda,y*diffloss);
00174                         if(t>skip)
00175                             combine_and_clip(Bc,B,w_dim,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
00176                         else
00177                             combine_and_clip(Bc,B,w_dim,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
00178                     }
00179                 }
00180                 updateB=false;
00181             }
00182             else
00183             {
00184                 if(--count<=0)
00185                 {
00186                     CMath::vector_multiply(result,Bc,w,w_dim);
00187                     CMath::add(w,-skip*lambda*eta,result,1.0,w,w_dim);
00188                     count = skip;
00189                     updateB=true;
00190                 }
00191 
00192                 if (z < 1 || is_log_loss)
00193                 {
00194                     CMath::vector_multiply(result,Bc,v.vector,w_dim);
00195                     CMath::add(w,eta*-loss->first_derivative(z,1)*y,result,1.0,w,w_dim);
00196                 }
00197             }
00198             t++;
00199 
00200             v.free_vector();
00201         }
00202     }
00203     SG_FREE(result);
00204     SG_FREE(w_1);
00205     SG_FREE(B);
00206 
00207     return true;
00208 }
00209 
00210 
00211 
00212 void CSGDQN::calibrate()
00213 {
00214     ASSERT(features);
00215     int32_t num_vec=features->get_num_vectors();
00216     int32_t c_dim=features->get_dim_feature_space();
00217 
00218     ASSERT(num_vec>0);
00219     ASSERT(c_dim>0);
00220 
00221     SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00222 
00223     int32_t n = 0;
00224     float64_t r = 0;
00225 
00226     for (int32_t j=0; j<num_vec ; j++, n++)
00227         r += features->get_nnz_features_for_vector(j);
00228 
00229 
00230     // compute weight decay skip
00231     skip = (int32_t) ((16 * n * c_dim) / r);
00232 }
00233 
00234 void CSGDQN::init()
00235 {
00236     t=0;
00237     C1=1;
00238     C2=1;
00239     epochs=5;
00240     skip=1000;
00241     count=1000;
00242 
00243     loss=new CHingeLoss();
00244     SG_REF(loss);
00245     
00246     m_parameters->add(&C1, "C1",  "Cost constant 1.");
00247     m_parameters->add(&C2, "C2",  "Cost constant 2.");
00248     m_parameters->add(&epochs, "epochs",  "epochs");
00249     m_parameters->add(&skip, "skip",  "skip");
00250     m_parameters->add(&count, "count",  "count");
00251 }