SHOGUN: LibLinearRegression.cpp Source File

Go to the documentation of this file.
00001 
00002 /*
00003  * This program is free software; you can redistribute it and/or modify
00004  * it under the terms of the GNU General Public License as published by
00005  * the Free Software Foundation; either version 3 of the License, or
00006  * (at your option) any later version.
00007  *
00008  * Copyright (C) 2012 Soeren Sonnenburg
00009  */
00010 
00011 #include <shogun/lib/config.h>
00012 #ifdef HAVE_LAPACK
00013 #include <shogun/regression/svr/LibLinearRegression.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/labels/RegressionLabels.h>
00016 #include <shogun/optimization/liblinear/tron.h>
00017 #include <shogun/lib/Signal.h>
00018 
00019 using namespace shogun;
00020 
00021 CLibLinearRegression::CLibLinearRegression() :
00022     CLinearMachine()
00023 {
00024     init_defaults();
00025 }
00026 
00027 CLibLinearRegression::CLibLinearRegression(float64_t C, CDotFeatures* feats, CLabels* labs) :
00028     CLinearMachine()
00029 {
00030     init_defaults();
00031     set_C(C);
00032     set_features(feats);
00033     set_labels(labs);
00034 }
00035 
00036 void CLibLinearRegression::init_defaults()
00037 {
00038     set_C(1.0);
00039     set_epsilon(1e-2);
00040     set_max_iter(10000);
00041     set_use_bias(false);
00042     set_liblinear_regression_type(L2R_L1LOSS_SVR_DUAL);
00043 }
00044 
00045 void CLibLinearRegression::register_parameters()
00046 {
00047     SG_ADD(&m_C, "m_C", "regularization constant",MS_AVAILABLE);
00048     SG_ADD(&m_epsilon, "m_epsilon", "tolerance epsilon",MS_NOT_AVAILABLE);
00049     SG_ADD(&m_epsilon, "m_tube_epsilon", "svr tube epsilon",MS_AVAILABLE);
00050     SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations",MS_NOT_AVAILABLE);
00051     SG_ADD(&m_use_bias, "m_use_bias", "indicates whether bias should be used",MS_NOT_AVAILABLE);
00052 }
00053 
00054 CLibLinearRegression::~CLibLinearRegression()
00055 {
00056 }
00057 
00058 bool CLibLinearRegression::train_machine(CFeatures* data)
00059 {
00060     CSignal::clear_cancel();
00061 
00062     if (data)
00063         set_features((CDotFeatures*)data);
00064 
00065     ASSERT(features);
00066     ASSERT(m_labels && m_labels->get_label_type()==LT_REGRESSION);
00067 
00068     int32_t num_train_labels=m_labels->get_num_labels();
00069     int32_t num_feat=features->get_dim_feature_space();
00070     int32_t num_vec=features->get_num_vectors();
00071 
00072     if (num_vec!=num_train_labels)
00073     {
00074         SG_ERROR("number of vectors %d does not match "
00075                 "number of training labels %d\n",
00076                 num_vec, num_train_labels);
00077     }
00078 
00079     if (m_use_bias)
00080         w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
00081     else
00082         w=SGVector<float64_t>(num_feat);
00083 
00084     problem prob;
00085     if (m_use_bias)
00086     {
00087         prob.n=w.vlen+1;
00088         memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
00089     }
00090     else
00091     {
00092         prob.n=w.vlen;
00093         memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
00094     }
00095     prob.l=num_vec;
00096     prob.x=features;
00097     prob.y=SG_MALLOC(float64_t, prob.l);
00098 
00099     switch (m_liblinear_regression_type)
00100     {
00101         case L2R_L2LOSS_SVR:
00102         {
00103             double* Cs = SG_MALLOC(double, prob.l);
00104             for(int i = 0; i < prob.l; i++)
00105                 Cs[i] = m_C;
00106 
00107             function *fun_obj=new l2r_l2_svr_fun(&prob, Cs, m_tube_epsilon);
00108             CTron tron_obj(fun_obj, m_epsilon);
00109             tron_obj.tron(w.vector, m_max_train_time);
00110             delete fun_obj;
00111             SG_FREE(Cs);
00112             break;
00113 
00114         }
00115         case L2R_L1LOSS_SVR_DUAL:
00116             solve_l2r_l1l2_svr(&prob);
00117             break;
00118         case L2R_L2LOSS_SVR_DUAL:
00119             solve_l2r_l1l2_svr(&prob);
00120             break;
00121         default:
00122             SG_ERROR("Error: unknown regression type\n");
00123             break;
00124     }
00125 
00126     return true;
00127 }
00128 
00129 // A coordinate descent algorithm for 
00130 // L1-loss and L2-loss epsilon-SVR dual problem
00131 //
00132 //  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
00133 //    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
00134 // 
00135 //  where Qij = xi^T xj and
00136 //  D is a diagonal matrix 
00137 //
00138 // In L1-SVM case:
00139 //      upper_bound_i = C
00140 //      lambda_i = 0
00141 // In L2-SVM case:
00142 //      upper_bound_i = INF
00143 //      lambda_i = 1/(2*C)
00144 //
00145 // Given: 
00146 // x, y, p, C
00147 // eps is the stopping tolerance
00148 //
00149 // solution will be put in w
00150 //
00151 // See Algorithm 4 of Ho and Lin, 2012   
00152 
00153 #undef GETI
00154 #define GETI(i) (0)
00155 // To support weights for instances, use GETI(i) (i)
00156 
00157 void CLibLinearRegression::solve_l2r_l1l2_svr(const problem *prob)
00158 {
00159     int l = prob->l;
00160     double C = m_C;
00161     double p = m_tube_epsilon;
00162     int w_size = prob->n;
00163     double eps = m_epsilon;
00164     int i, s, iter = 0;
00165     int max_iter = 1000;
00166     int active_size = l;
00167     int *index = new int[l];
00168 
00169     double d, G, H;
00170     double Gmax_old = CMath::INFTY;
00171     double Gmax_new, Gnorm1_new;
00172     double Gnorm1_init = 0.0;
00173     double *beta = new double[l];
00174     double *QD = new double[l];
00175     double *y = prob->y;
00176 
00177     // L2R_L2LOSS_SVR_DUAL
00178     double lambda[1], upper_bound[1];
00179     lambda[0] = 0.5/C;
00180     upper_bound[0] = CMath::INFTY;
00181 
00182     if(m_liblinear_regression_type == L2R_L1LOSS_SVR_DUAL)
00183     {
00184         lambda[0] = 0;
00185         upper_bound[0] = C;
00186     }
00187 
00188     // Initial beta can be set here. Note that
00189     // -upper_bound <= beta[i] <= upper_bound
00190     for(i=0; i<l; i++)
00191         beta[i] = 0;
00192 
00193     for(i=0; i<w_size; i++)
00194         w[i] = 0;
00195     for(i=0; i<l; i++)
00196     {
00197         QD[i] = prob->x->dot(i, prob->x,i);
00198         prob->x->add_to_dense_vec(beta[i], i, w.vector, w_size);
00199 
00200         if (prob->use_bias)
00201             w.vector[w_size]+=beta[i];
00202 
00203         index[i] = i;
00204     }
00205 
00206 
00207     while(iter < max_iter)
00208     {
00209         Gmax_new = 0;
00210         Gnorm1_new = 0;
00211 
00212         for(i=0; i<active_size; i++)
00213         {
00214             int j = i+rand()%(active_size-i);
00215             CMath::swap(index[i], index[j]);
00216         }
00217 
00218         for(s=0; s<active_size; s++)
00219         {
00220             i = index[s];
00221             G = -y[i] + lambda[GETI(i)]*beta[i];
00222             H = QD[i] + lambda[GETI(i)];
00223 
00224             G += prob->x->dense_dot(i, w.vector, w_size);
00225             if (prob->use_bias)
00226                 G+=w.vector[w_size];
00227 
00228             double Gp = G+p;
00229             double Gn = G-p;
00230             double violation = 0;
00231             if(beta[i] == 0)
00232             {
00233                 if(Gp < 0)
00234                     violation = -Gp;
00235                 else if(Gn > 0)
00236                     violation = Gn;
00237                 else if(Gp>Gmax_old && Gn<-Gmax_old)
00238                 {
00239                     active_size--;
00240                     CMath::swap(index[s], index[active_size]);
00241                     s--;
00242                     continue;
00243                 }
00244             }
00245             else if(beta[i] >= upper_bound[GETI(i)])
00246             {
00247                 if(Gp > 0)
00248                     violation = Gp;
00249                 else if(Gp < -Gmax_old)
00250                 {
00251                     active_size--;
00252                     CMath::swap(index[s], index[active_size]);
00253                     s--;
00254                     continue;
00255                 }
00256             }
00257             else if(beta[i] <= -upper_bound[GETI(i)])
00258             {
00259                 if(Gn < 0)
00260                     violation = -Gn;
00261                 else if(Gn > Gmax_old)
00262                 {
00263                     active_size--;
00264                     CMath::swap(index[s], index[active_size]);
00265                     s--;
00266                     continue;
00267                 }
00268             }
00269             else if(beta[i] > 0)
00270                 violation = fabs(Gp);
00271             else
00272                 violation = fabs(Gn);
00273 
00274             Gmax_new = CMath::max(Gmax_new, violation);
00275             Gnorm1_new += violation;
00276 
00277             // obtain Newton direction d
00278             if(Gp < H*beta[i])
00279                 d = -Gp/H;
00280             else if(Gn > H*beta[i])
00281                 d = -Gn/H;
00282             else
00283                 d = -beta[i];
00284 
00285             if(fabs(d) < 1.0e-12)
00286                 continue;
00287 
00288             double beta_old = beta[i];
00289             beta[i] = CMath::min(CMath::max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
00290             d = beta[i]-beta_old;
00291 
00292             if(d != 0)
00293             {
00294                 prob->x->add_to_dense_vec(d, i, w.vector, w_size);
00295 
00296                 if (prob->use_bias)
00297                     w.vector[w_size]+=d;
00298             }
00299         }
00300 
00301         if(iter == 0)
00302             Gnorm1_init = Gnorm1_new;
00303         iter++;
00304 
00305         SG_SABS_PROGRESS(Gnorm1_new, -CMath::log10(Gnorm1_new), -CMath::log10(eps*Gnorm1_init), -CMath::log10(Gnorm1_init), 6);
00306 
00307         if(Gnorm1_new <= eps*Gnorm1_init)
00308         {
00309             if(active_size == l)
00310                 break;
00311             else
00312             {
00313                 active_size = l;
00314                 Gmax_old = CMath::INFTY;
00315                 continue;
00316             }
00317         }
00318 
00319         Gmax_old = Gmax_new;
00320     }
00321 
00322     SG_DONE();
00323     SG_INFO("\noptimization finished, #iter = %d\n", iter);
00324     if(iter >= max_iter)
00325         SG_INFO("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
00326 
00327     // calculate objective value
00328     double v = 0;
00329     int nSV = 0;
00330     for(i=0; i<w_size; i++)
00331         v += w[i]*w[i];
00332     v = 0.5*v;
00333     for(i=0; i<l; i++)
00334     {
00335         v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
00336         if(beta[i] != 0)
00337             nSV++;
00338     }
00339 
00340     SG_INFO("Objective value = %lf\n", v);
00341     SG_INFO("nSV = %d\n",nSV);
00342 
00343     delete [] beta;
00344     delete [] QD;
00345     delete [] index;
00346 }
00347 
00348 #endif /* HAVE_LAPACK */