SHOGUN: SGDQN.cpp Source File

Go to the documentation of this file.
00001 /*
00002    SVM with Quasi-Newton stochastic gradient
00003    Copyright (C) 2009- Antoine Bordes
00004 
00005    This program is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software
00017    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00018 
00019    Shogun adjustments (w) 2011 Siddharth Kherada
00020 */
00021 
00022 #include <shogun/classifier/svm/SGDQN.h>
00023 #include <shogun/base/Parameter.h>
00024 #include <shogun/lib/Signal.h>
00025 #include <shogun/mathematics/Math.h>
00026 #include <shogun/loss/HingeLoss.h>
00027 #include <shogun/labels/BinaryLabels.h>
00028 
00029 using namespace shogun;
00030 
00031 CSGDQN::CSGDQN()
00032 : CLinearMachine()
00033 {
00034     init();
00035 }
00036 
00037 CSGDQN::CSGDQN(float64_t C)
00038 : CLinearMachine()
00039 {
00040     init();
00041 
00042     C1=C;
00043     C2=C;
00044 }
00045 
00046 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00047 : CLinearMachine()
00048 {
00049     init();
00050     C1=C;
00051     C2=C;
00052 
00053     set_features(traindat);
00054     set_labels(trainlab);
00055 }
00056 
00057 CSGDQN::~CSGDQN()
00058 {
00059     SG_UNREF(loss);
00060 }
00061 
00062 void CSGDQN::set_loss_function(CLossFunction* loss_func)
00063 {
00064     if (loss)
00065         SG_UNREF(loss);
00066     loss=loss_func;
00067     SG_REF(loss);
00068 }
00069 
00070 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
00071 {
00072     for (int32_t i=0; i < dim;i++)
00073     {
00074         float64_t diffw=W_1[i]-W[i];
00075         if(diffw)
00076             B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
00077         else
00078             B[i]+=1/lambda;
00079     }
00080 }
00081 
00082 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2)
00083 {
00084     for (int32_t i=0; i < dim;i++)
00085     {
00086         if(B[i])
00087         {
00088             Bc[i] = Bc[i] * c1 + B[i] * c2;
00089             Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
00090         }
00091     }
00092 }
00093 
00094 bool CSGDQN::train(CFeatures* data)
00095 {
00096 
00097     ASSERT(m_labels);
00098     ASSERT(m_labels->get_label_type() == LT_BINARY);
00099 
00100     if (data)
00101     {
00102         if (!data->has_property(FP_DOT))
00103             SG_ERROR("Specified features are not of type CDotFeatures\n");
00104         set_features((CDotFeatures*) data);
00105     }
00106 
00107     ASSERT(features);
00108 
00109     int32_t num_train_labels=m_labels->get_num_labels();
00110     int32_t num_vec=features->get_num_vectors();
00111 
00112     ASSERT(num_vec==num_train_labels);
00113     ASSERT(num_vec>0);
00114 
00115     w=SGVector<float64_t>(features->get_dim_feature_space());
00116     w.zero();
00117 
00118     float64_t lambda= 1.0/(C1*num_vec);
00119 
00120     // Shift t in order to have a
00121     // reasonable initial learning rate.
00122     // This assumes |x| \approx 1.
00123     float64_t maxw = 1.0 / sqrt(lambda);
00124     float64_t typw = sqrt(maxw);
00125     float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00126     t = 1 / (eta0 * lambda);
00127 
00128     SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00129 
00130 
00131     float64_t* Bc=SG_MALLOC(float64_t, w.vlen);
00132     SGVector<float64_t>::fill_vector(Bc, w.vlen, 1/lambda);
00133 
00134     float64_t* result=SG_MALLOC(float64_t, w.vlen);
00135     float64_t* B=SG_MALLOC(float64_t, w.vlen);
00136 
00137     //Calibrate
00138     calibrate();
00139 
00140     SG_INFO("Training on %d vectors\n", num_vec);
00141     CSignal::clear_cancel();
00142 
00143     ELossType loss_type = loss->get_loss_type();
00144     bool is_log_loss = false;
00145     if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00146         is_log_loss = true;
00147 
00148     for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00149     {
00150         count = skip;
00151         bool updateB=false;
00152         for (int32_t i=0; i<num_vec; i++)
00153         {
00154             SGVector<float64_t> v = features->get_computed_dot_feature_vector(i);
00155             ASSERT(w.vlen==v.vlen);
00156             float64_t eta = 1.0/t;
00157             float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
00158             float64_t z = y * features->dense_dot(i, w.vector, w.vlen);
00159             if(updateB==true)
00160             {
00161                 if (z < 1 || is_log_loss)
00162                 {
00163                     SGVector<float64_t> w_1=w.clone();
00164                     float64_t loss_1=-loss->first_derivative(z,1);
00165                     SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen);
00166                     SGVector<float64_t>::add(w.vector,eta*loss_1*y,result,1.0,w.vector,w.vlen);
00167                     float64_t z2 = y * features->dense_dot(i, w.vector, w.vlen);
00168                     float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
00169                     if(diffloss)
00170                     {
00171                         compute_ratio(w.vector,w_1.vector,B,v.vector,w.vlen,lambda,y*diffloss);
00172                         if(t>skip)
00173                             combine_and_clip(Bc,B,w.vlen,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
00174                         else
00175                             combine_and_clip(Bc,B,w.vlen,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
00176                     }
00177                 }
00178                 updateB=false;
00179             }
00180             else
00181             {
00182                 if(--count<=0)
00183                 {
00184                     SGVector<float64_t>::vector_multiply(result,Bc,w.vector,w.vlen);
00185                     SGVector<float64_t>::add(w.vector,-skip*lambda*eta,result,1.0,w.vector,w.vlen);
00186                     count = skip;
00187                     updateB=true;
00188                 }
00189 
00190                 if (z < 1 || is_log_loss)
00191                 {
00192                     SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen);
00193                     SGVector<float64_t>::add(w.vector,eta*-loss->first_derivative(z,1)*y,result,1.0,w.vector,w.vlen);
00194                 }
00195             }
00196             t++;
00197         }
00198     }
00199     SG_FREE(result);
00200     SG_FREE(B);
00201 
00202     return true;
00203 }
00204 
00205 
00206 
00207 void CSGDQN::calibrate()
00208 {
00209     ASSERT(features);
00210     int32_t num_vec=features->get_num_vectors();
00211     int32_t c_dim=features->get_dim_feature_space();
00212 
00213     ASSERT(num_vec>0);
00214     ASSERT(c_dim>0);
00215 
00216     SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00217 
00218     int32_t n = 0;
00219     float64_t r = 0;
00220 
00221     for (int32_t j=0; j<num_vec ; j++, n++)
00222         r += features->get_nnz_features_for_vector(j);
00223 
00224 
00225     // compute weight decay skip
00226     skip = (int32_t) ((16 * n * c_dim) / r);
00227 }
00228 
00229 void CSGDQN::init()
00230 {
00231     t=0;
00232     C1=1;
00233     C2=1;
00234     epochs=5;
00235     skip=1000;
00236     count=1000;
00237 
00238     loss=new CHingeLoss();
00239     SG_REF(loss);
00240 
00241     m_parameters->add(&C1, "C1",  "Cost constant 1.");
00242     m_parameters->add(&C2, "C2",  "Cost constant 2.");
00243     m_parameters->add(&epochs, "epochs",  "epochs");
00244     m_parameters->add(&skip, "skip",  "skip");
00245     m_parameters->add(&count, "count",  "count");
00246 }