LibLinear.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2010 Soeren Sonnenburg
00008  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00009  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include <shogun/lib/config.h>
00012 
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/lib/Signal.h>
00015 #include <shogun/lib/Time.h>
00016 #include <shogun/base/Parameter.h>
00017 #include <shogun/classifier/svm/LibLinear.h>
00018 #include <shogun/optimization/liblinear/tron.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/labels/BinaryLabels.h>
00021 
00022 using namespace shogun;
00023 
00024 CLibLinear::CLibLinear()
00025 : CLinearMachine()
00026 {
00027     init();
00028 }
00029 
00030 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00031 : CLinearMachine()
00032 {
00033     init();
00034     liblinear_solver_type=l;
00035 }
00036 
00037 CLibLinear::CLibLinear(
00038     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00039 : CLinearMachine()
00040 {
00041     init();
00042     C1=C;
00043     C2=C;
00044     use_bias=true;
00045 
00046     set_features(traindat);
00047     set_labels(trainlab);
00048     init_linear_term();
00049 }
00050 
00051 void CLibLinear::init()
00052 {
00053     liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00054     use_bias=false;
00055     C1=1;
00056     C2=1;
00057     set_max_iterations();
00058     epsilon=1e-5;
00059 
00060     SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00061     SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00062     SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00063             MS_NOT_AVAILABLE);
00064     SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00065     SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00066             MS_NOT_AVAILABLE);
00067     SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
00068     SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
00069             "Type of LibLinear solver.", MS_NOT_AVAILABLE);
00070 }
00071 
00072 CLibLinear::~CLibLinear()
00073 {
00074 }
00075 
00076 bool CLibLinear::train_machine(CFeatures* data)
00077 {
00078     CSignal::clear_cancel();
00079     ASSERT(m_labels);
00080     ASSERT(m_labels->get_label_type() == LT_BINARY);
00081 
00082     if (data)
00083     {
00084         if (!data->has_property(FP_DOT))
00085             SG_ERROR("Specified features are not of type CDotFeatures\n");
00086 
00087         set_features((CDotFeatures*) data);
00088     }
00089     ASSERT(features);
00090 
00091 
00092     int32_t num_train_labels=m_labels->get_num_labels();
00093     int32_t num_feat=features->get_dim_feature_space();
00094     int32_t num_vec=features->get_num_vectors();
00095 
00096     if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00097             (liblinear_solver_type == L1R_LR) )
00098     {
00099         if (num_feat!=num_train_labels)
00100         {
00101             SG_ERROR("L1 methods require the data to be transposed: "
00102                     "number of features %d does not match number of "
00103                     "training labels %d\n",
00104                     num_feat, num_train_labels);
00105         }
00106         CMath::swap(num_feat, num_vec);
00107 
00108     }
00109     else
00110     {
00111         if (num_vec!=num_train_labels)
00112         {
00113             SG_ERROR("number of vectors %d does not match "
00114                     "number of training labels %d\n",
00115                     num_vec, num_train_labels);
00116         }
00117     }
00118     if (use_bias)
00119         w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
00120     else
00121         w=SGVector<float64_t>(num_feat);
00122 
00123     problem prob;
00124     if (use_bias)
00125     {
00126         prob.n=w.vlen+1;
00127         memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
00128     }
00129     else
00130     {
00131         prob.n=w.vlen;
00132         memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
00133     }
00134     prob.l=num_vec;
00135     prob.x=features;
00136     prob.y=SG_MALLOC(double, prob.l);
00137     float64_t* Cs=SG_MALLOC(double, prob.l);
00138     prob.use_bias=use_bias;
00139     double Cp=C1;
00140     double Cn=C2;
00141 
00142     for (int32_t i=0; i<prob.l; i++)
00143     {
00144         prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
00145         if (prob.y[i] == +1)
00146             Cs[i]=C1;
00147         else if (prob.y[i] == -1)
00148             Cs[i]=C2;
00149         else
00150             SG_ERROR("labels should be +1/-1 only\n");
00151     }
00152 
00153     int pos = 0;
00154     int neg = 0;
00155     for(int i=0;i<prob.l;i++)
00156     {
00157         if(prob.y[i]==+1)
00158             pos++;
00159     }
00160     neg = prob.l - pos;
00161 
00162     SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00163 
00164     function *fun_obj=NULL;
00165     switch (liblinear_solver_type)
00166     {
00167         case L2R_LR:
00168         {
00169             fun_obj=new l2r_lr_fun(&prob, Cs);
00170             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00171             SG_DEBUG("starting L2R_LR training via tron\n");
00172             tron_obj.tron(w.vector, m_max_train_time);
00173             SG_DEBUG("done with tron\n");
00174             delete fun_obj;
00175             break;
00176         }
00177         case L2R_L2LOSS_SVC:
00178         {
00179             fun_obj=new l2r_l2_svc_fun(&prob, Cs);
00180             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00181             tron_obj.tron(w.vector, m_max_train_time);
00182             delete fun_obj;
00183             break;
00184         }
00185         case L2R_L2LOSS_SVC_DUAL:
00186             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00187             break;
00188         case L2R_L1LOSS_SVC_DUAL:
00189             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00190             break;
00191         case L1R_L2LOSS_SVC:
00192         {
00193             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00194             solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00195             break;
00196         }
00197         case L1R_LR:
00198         {
00199             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00200             solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00201             break;
00202         }
00203         case L2R_LR_DUAL:
00204         {
00205             solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
00206             break;
00207         }
00208         default:
00209             SG_ERROR("Error: unknown solver_type\n");
00210             break;
00211     }
00212 
00213     if (use_bias)
00214         set_bias(w[w.vlen]);
00215     else
00216         set_bias(0);
00217 
00218     SG_FREE(prob.y);
00219     SG_FREE(Cs);
00220 
00221     return true;
00222 }
00223 
00224 // A coordinate descent algorithm for
00225 // L1-loss and L2-loss SVM dual problems
00226 //
00227 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
00228 //    s.t.      0 <= alpha_i <= upper_bound_i,
00229 //
00230 //  where Qij = yi yj xi^T xj and
00231 //  D is a diagonal matrix
00232 //
00233 // In L1-SVM case:
00234 //      upper_bound_i = Cp if y_i = 1
00235 //      upper_bound_i = Cn if y_i = -1
00236 //      D_ii = 0
00237 // In L2-SVM case:
00238 //      upper_bound_i = INF
00239 //      D_ii = 1/(2*Cp) if y_i = 1
00240 //      D_ii = 1/(2*Cn) if y_i = -1
00241 //
00242 // Given:
00243 // x, y, Cp, Cn
00244 // eps is the stopping tolerance
00245 //
00246 // solution will be put in w
00247 
00248 #undef GETI
00249 #define GETI(i) (y[i]+1)
00250 // To support weights for instances, use GETI(i) (i)
00251 
00252 void CLibLinear::solve_l2r_l1l2_svc(
00253             const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00254 {
00255     int l = prob->l;
00256     int w_size = prob->n;
00257     int i, s, iter = 0;
00258     double C, d, G;
00259     double *QD = SG_MALLOC(double, l);
00260     int *index = SG_MALLOC(int, l);
00261     double *alpha = SG_MALLOC(double, l);
00262     int32_t *y = SG_MALLOC(int32_t, l);
00263     int active_size = l;
00264 
00265     // PG: projected gradient, for shrinking and stopping
00266     double PG;
00267     double PGmax_old = CMath::INFTY;
00268     double PGmin_old = -CMath::INFTY;
00269     double PGmax_new, PGmin_new;
00270 
00271     // default solver_type: L2R_L2LOSS_SVC_DUAL
00272     double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00273     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00274     if(st == L2R_L1LOSS_SVC_DUAL)
00275     {
00276         diag[0] = 0;
00277         diag[2] = 0;
00278         upper_bound[0] = Cn;
00279         upper_bound[2] = Cp;
00280     }
00281 
00282     int n = prob->n;
00283 
00284     if (prob->use_bias)
00285         n--;
00286 
00287     for(i=0; i<w_size; i++)
00288         w[i] = 0;
00289 
00290     for(i=0; i<l; i++)
00291     {
00292         alpha[i] = 0;
00293         if(prob->y[i] > 0)
00294         {
00295             y[i] = +1;
00296         }
00297         else
00298         {
00299             y[i] = -1;
00300         }
00301         QD[i] = diag[GETI(i)];
00302 
00303         QD[i] += prob->x->dot(i, prob->x,i);
00304         index[i] = i;
00305     }
00306 
00307 
00308     CTime start_time;
00309     while (iter < max_iterations && !CSignal::cancel_computations())
00310     {
00311         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00312           break;
00313 
00314         PGmax_new = -CMath::INFTY;
00315         PGmin_new = CMath::INFTY;
00316 
00317         for (i=0; i<active_size; i++)
00318         {
00319             int j = i+rand()%(active_size-i);
00320             CMath::swap(index[i], index[j]);
00321         }
00322 
00323         for (s=0;s<active_size;s++)
00324         {
00325             i = index[s];
00326             int32_t yi = y[i];
00327 
00328             G = prob->x->dense_dot(i, w.vector, n);
00329             if (prob->use_bias)
00330                 G+=w.vector[n];
00331 
00332             if (m_linear_term.vector)
00333                 G = G*yi + m_linear_term.vector[i];
00334             else
00335                 G = G*yi-1;
00336 
00337             C = upper_bound[GETI(i)];
00338             G += alpha[i]*diag[GETI(i)];
00339 
00340             PG = 0;
00341             if (alpha[i] == 0)
00342             {
00343                 if (G > PGmax_old)
00344                 {
00345                     active_size--;
00346                     CMath::swap(index[s], index[active_size]);
00347                     s--;
00348                     continue;
00349                 }
00350                 else if (G < 0)
00351                     PG = G;
00352             }
00353             else if (alpha[i] == C)
00354             {
00355                 if (G < PGmin_old)
00356                 {
00357                     active_size--;
00358                     CMath::swap(index[s], index[active_size]);
00359                     s--;
00360                     continue;
00361                 }
00362                 else if (G > 0)
00363                     PG = G;
00364             }
00365             else
00366                 PG = G;
00367 
00368             PGmax_new = CMath::max(PGmax_new, PG);
00369             PGmin_new = CMath::min(PGmin_new, PG);
00370 
00371             if(fabs(PG) > 1.0e-12)
00372             {
00373                 double alpha_old = alpha[i];
00374                 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00375                 d = (alpha[i] - alpha_old)*yi;
00376 
00377                 prob->x->add_to_dense_vec(d, i, w.vector, n);
00378 
00379                 if (prob->use_bias)
00380                     w.vector[n]+=d;
00381             }
00382         }
00383 
00384         iter++;
00385         float64_t gap=PGmax_new - PGmin_new;
00386         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00387 
00388         if(gap <= eps)
00389         {
00390             if(active_size == l)
00391                 break;
00392             else
00393             {
00394                 active_size = l;
00395                 PGmax_old = CMath::INFTY;
00396                 PGmin_old = -CMath::INFTY;
00397                 continue;
00398             }
00399         }
00400         PGmax_old = PGmax_new;
00401         PGmin_old = PGmin_new;
00402         if (PGmax_old <= 0)
00403             PGmax_old = CMath::INFTY;
00404         if (PGmin_old >= 0)
00405             PGmin_old = -CMath::INFTY;
00406     }
00407 
00408     SG_DONE();
00409     SG_INFO("optimization finished, #iter = %d\n",iter);
00410     if (iter >= max_iterations)
00411     {
00412         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00413                 "(also see liblinear FAQ)\n\n");
00414     }
00415 
00416     // calculate objective value
00417 
00418     double v = 0;
00419     int nSV = 0;
00420     for(i=0; i<w_size; i++)
00421         v += w.vector[i]*w.vector[i];
00422     for(i=0; i<l; i++)
00423     {
00424         v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00425         if(alpha[i] > 0)
00426             ++nSV;
00427     }
00428     SG_INFO("Objective value = %lf\n",v/2);
00429     SG_INFO("nSV = %d\n",nSV);
00430 
00431     SG_FREE(QD);
00432     SG_FREE(alpha);
00433     SG_FREE(y);
00434     SG_FREE(index);
00435 }
00436 
00437 // A coordinate descent algorithm for
00438 // L1-regularized L2-loss support vector classification
00439 //
00440 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
00441 //
00442 // Given:
00443 // x, y, Cp, Cn
00444 // eps is the stopping tolerance
00445 //
00446 // solution will be put in w
00447 
00448 #undef GETI
00449 #define GETI(i) (y[i]+1)
00450 // To support weights for instances, use GETI(i) (i)
00451 
00452 void CLibLinear::solve_l1r_l2_svc(
00453     problem *prob_col, double eps, double Cp, double Cn)
00454 {
00455     int l = prob_col->l;
00456     int w_size = prob_col->n;
00457     int j, s, iter = 0;
00458     int active_size = w_size;
00459     int max_num_linesearch = 20;
00460 
00461     double sigma = 0.01;
00462     double d, G_loss, G, H;
00463     double Gmax_old = CMath::INFTY;
00464     double Gmax_new;
00465     double Gmax_init=0;
00466     double d_old, d_diff;
00467     double loss_old=0, loss_new;
00468     double appxcond, cond;
00469 
00470     int *index = SG_MALLOC(int, w_size);
00471     int32_t *y = SG_MALLOC(int32_t, l);
00472     double *b = SG_MALLOC(double, l); // b = 1-ywTx
00473     double *xj_sq = SG_MALLOC(double, w_size);
00474 
00475     CDotFeatures* x = (CDotFeatures*) prob_col->x;
00476     void* iterator;
00477     int32_t ind;
00478     float64_t val;
00479 
00480     double C[3] = {Cn,0,Cp};
00481 
00482     int n = prob_col->n;
00483     if (prob_col->use_bias)
00484         n--;
00485 
00486     for(j=0; j<l; j++)
00487     {
00488         b[j] = 1;
00489         if(prob_col->y[j] > 0)
00490             y[j] = 1;
00491         else
00492             y[j] = -1;
00493     }
00494 
00495     for(j=0; j<w_size; j++)
00496     {
00497         w.vector[j] = 0;
00498         index[j] = j;
00499         xj_sq[j] = 0;
00500 
00501         if (use_bias && j==n)
00502         {
00503             for (ind=0; ind<l; ind++)
00504                 xj_sq[n] += C[GETI(ind)];
00505         }
00506         else
00507         {
00508             iterator=x->get_feature_iterator(j);
00509             while (x->get_next_feature(ind, val, iterator))
00510                 xj_sq[j] += C[GETI(ind)]*val*val;
00511             x->free_feature_iterator(iterator);
00512         }
00513     }
00514 
00515 
00516     CTime start_time;
00517     while (iter < max_iterations && !CSignal::cancel_computations())
00518     {
00519         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00520           break;
00521 
00522         Gmax_new  = 0;
00523 
00524         for(j=0; j<active_size; j++)
00525         {
00526             int i = j+rand()%(active_size-j);
00527             CMath::swap(index[i], index[j]);
00528         }
00529 
00530         for(s=0; s<active_size; s++)
00531         {
00532             j = index[s];
00533             G_loss = 0;
00534             H = 0;
00535 
00536             if (use_bias && j==n)
00537             {
00538                 for (ind=0; ind<l; ind++)
00539                 {
00540                     if(b[ind] > 0)
00541                     {
00542                         double tmp = C[GETI(ind)]*y[ind];
00543                         G_loss -= tmp*b[ind];
00544                         H += tmp*y[ind];
00545                     }
00546                 }
00547             }
00548             else
00549             {
00550                 iterator=x->get_feature_iterator(j);
00551 
00552                 while (x->get_next_feature(ind, val, iterator))
00553                 {
00554                     if(b[ind] > 0)
00555                     {
00556                         double tmp = C[GETI(ind)]*val*y[ind];
00557                         G_loss -= tmp*b[ind];
00558                         H += tmp*val*y[ind];
00559                     }
00560                 }
00561                 x->free_feature_iterator(iterator);
00562             }
00563 
00564             G_loss *= 2;
00565 
00566             G = G_loss;
00567             H *= 2;
00568             H = CMath::max(H, 1e-12);
00569 
00570             double Gp = G+1;
00571             double Gn = G-1;
00572             double violation = 0;
00573             if(w.vector[j] == 0)
00574             {
00575                 if(Gp < 0)
00576                     violation = -Gp;
00577                 else if(Gn > 0)
00578                     violation = Gn;
00579                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00580                 {
00581                     active_size--;
00582                     CMath::swap(index[s], index[active_size]);
00583                     s--;
00584                     continue;
00585                 }
00586             }
00587             else if(w.vector[j] > 0)
00588                 violation = fabs(Gp);
00589             else
00590                 violation = fabs(Gn);
00591 
00592             Gmax_new = CMath::max(Gmax_new, violation);
00593 
00594             // obtain Newton direction d
00595             if(Gp <= H*w.vector[j])
00596                 d = -Gp/H;
00597             else if(Gn >= H*w.vector[j])
00598                 d = -Gn/H;
00599             else
00600                 d = -w.vector[j];
00601 
00602             if(fabs(d) < 1.0e-12)
00603                 continue;
00604 
00605             double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
00606             d_old = 0;
00607             int num_linesearch;
00608             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00609             {
00610                 d_diff = d_old - d;
00611                 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
00612 
00613                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00614                 if(appxcond <= 0)
00615                 {
00616                     if (use_bias && j==n)
00617                     {
00618                         for (ind=0; ind<l; ind++)
00619                             b[ind] += d_diff*y[ind];
00620                         break;
00621                     }
00622                     else
00623                     {
00624                         iterator=x->get_feature_iterator(j);
00625                         while (x->get_next_feature(ind, val, iterator))
00626                             b[ind] += d_diff*val*y[ind];
00627 
00628                         x->free_feature_iterator(iterator);
00629                         break;
00630                     }
00631                 }
00632 
00633                 if(num_linesearch == 0)
00634                 {
00635                     loss_old = 0;
00636                     loss_new = 0;
00637 
00638                     if (use_bias && j==n)
00639                     {
00640                         for (ind=0; ind<l; ind++)
00641                         {
00642                             if(b[ind] > 0)
00643                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00644                             double b_new = b[ind] + d_diff*y[ind];
00645                             b[ind] = b_new;
00646                             if(b_new > 0)
00647                                 loss_new += C[GETI(ind)]*b_new*b_new;
00648                         }
00649                     }
00650                     else
00651                     {
00652                         iterator=x->get_feature_iterator(j);
00653                         while (x->get_next_feature(ind, val, iterator))
00654                         {
00655                             if(b[ind] > 0)
00656                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00657                             double b_new = b[ind] + d_diff*val*y[ind];
00658                             b[ind] = b_new;
00659                             if(b_new > 0)
00660                                 loss_new += C[GETI(ind)]*b_new*b_new;
00661                         }
00662                         x->free_feature_iterator(iterator);
00663                     }
00664                 }
00665                 else
00666                 {
00667                     loss_new = 0;
00668                     if (use_bias && j==n)
00669                     {
00670                         for (ind=0; ind<l; ind++)
00671                         {
00672                             double b_new = b[ind] + d_diff*y[ind];
00673                             b[ind] = b_new;
00674                             if(b_new > 0)
00675                                 loss_new += C[GETI(ind)]*b_new*b_new;
00676                         }
00677                     }
00678                     else
00679                     {
00680                         iterator=x->get_feature_iterator(j);
00681                         while (x->get_next_feature(ind, val, iterator))
00682                         {
00683                             double b_new = b[ind] + d_diff*val*y[ind];
00684                             b[ind] = b_new;
00685                             if(b_new > 0)
00686                                 loss_new += C[GETI(ind)]*b_new*b_new;
00687                         }
00688                         x->free_feature_iterator(iterator);
00689                     }
00690                 }
00691 
00692                 cond = cond + loss_new - loss_old;
00693                 if(cond <= 0)
00694                     break;
00695                 else
00696                 {
00697                     d_old = d;
00698                     d *= 0.5;
00699                     delta *= 0.5;
00700                 }
00701             }
00702 
00703             w.vector[j] += d;
00704 
00705             // recompute b[] if line search takes too many steps
00706             if(num_linesearch >= max_num_linesearch)
00707             {
00708                 SG_INFO("#");
00709                 for(int i=0; i<l; i++)
00710                     b[i] = 1;
00711 
00712                 for(int i=0; i<n; i++)
00713                 {
00714                     if(w.vector[i]==0)
00715                         continue;
00716 
00717                     iterator=x->get_feature_iterator(i);
00718                     while (x->get_next_feature(ind, val, iterator))
00719                         b[ind] -= w.vector[i]*val*y[ind];
00720                     x->free_feature_iterator(iterator);
00721                 }
00722 
00723                 if (use_bias && w.vector[n])
00724                 {
00725                     for (ind=0; ind<l; ind++)
00726                         b[ind] -= w.vector[n]*y[ind];
00727                 }
00728             }
00729         }
00730 
00731         if(iter == 0)
00732             Gmax_init = Gmax_new;
00733         iter++;
00734 
00735         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
00736                 -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00737 
00738         if(Gmax_new <= eps*Gmax_init)
00739         {
00740             if(active_size == w_size)
00741                 break;
00742             else
00743             {
00744                 active_size = w_size;
00745                 Gmax_old = CMath::INFTY;
00746                 continue;
00747             }
00748         }
00749 
00750         Gmax_old = Gmax_new;
00751     }
00752 
00753     SG_DONE();
00754     SG_INFO("optimization finished, #iter = %d\n", iter);
00755     if(iter >= max_iterations)
00756         SG_WARNING("\nWARNING: reaching max number of iterations\n");
00757 
00758     // calculate objective value
00759 
00760     double v = 0;
00761     int nnz = 0;
00762     for(j=0; j<w_size; j++)
00763     {
00764         if(w.vector[j] != 0)
00765         {
00766             v += fabs(w.vector[j]);
00767             nnz++;
00768         }
00769     }
00770     for(j=0; j<l; j++)
00771         if(b[j] > 0)
00772             v += C[GETI(j)]*b[j]*b[j];
00773 
00774     SG_INFO("Objective value = %lf\n", v);
00775     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00776 
00777     SG_FREE(index);
00778     SG_FREE(y);
00779     SG_FREE(b);
00780     SG_FREE(xj_sq);
00781 }
00782 
00783 // A coordinate descent algorithm for
00784 // L1-regularized logistic regression problems
00785 //
00786 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
00787 //
00788 // Given:
00789 // x, y, Cp, Cn
00790 // eps is the stopping tolerance
00791 //
00792 // solution will be put in w
00793 
00794 #undef GETI
00795 #define GETI(i) (y[i]+1)
00796 // To support weights for instances, use GETI(i) (i)
00797 
00798 void CLibLinear::solve_l1r_lr(
00799     const problem *prob_col, double eps,
00800     double Cp, double Cn)
00801 {
00802     int l = prob_col->l;
00803     int w_size = prob_col->n;
00804     int j, s, iter = 0;
00805     int active_size = w_size;
00806     int max_num_linesearch = 20;
00807 
00808     double x_min = 0;
00809     double sigma = 0.01;
00810     double d, G, H;
00811     double Gmax_old = CMath::INFTY;
00812     double Gmax_new;
00813     double Gmax_init=0;
00814     double sum1, appxcond1;
00815     double sum2, appxcond2;
00816     double cond;
00817 
00818     int *index = SG_MALLOC(int, w_size);
00819     int32_t *y = SG_MALLOC(int32_t, l);
00820     double *exp_wTx = SG_MALLOC(double, l);
00821     double *exp_wTx_new = SG_MALLOC(double, l);
00822     double *xj_max = SG_MALLOC(double, w_size);
00823     double *C_sum = SG_MALLOC(double, w_size);
00824     double *xjneg_sum = SG_MALLOC(double, w_size);
00825     double *xjpos_sum = SG_MALLOC(double, w_size);
00826 
00827     CDotFeatures* x = prob_col->x;
00828     void* iterator;
00829     int ind;
00830     double val;
00831 
00832     double C[3] = {Cn,0,Cp};
00833 
00834     int n = prob_col->n;
00835     if (prob_col->use_bias)
00836         n--;
00837 
00838     for(j=0; j<l; j++)
00839     {
00840         exp_wTx[j] = 1;
00841         if(prob_col->y[j] > 0)
00842             y[j] = 1;
00843         else
00844             y[j] = -1;
00845     }
00846     for(j=0; j<w_size; j++)
00847     {
00848         w.vector[j] = 0;
00849         index[j] = j;
00850         xj_max[j] = 0;
00851         C_sum[j] = 0;
00852         xjneg_sum[j] = 0;
00853         xjpos_sum[j] = 0;
00854 
00855         if (use_bias && j==n)
00856         {
00857             for (ind=0; ind<l; ind++)
00858             {
00859                 x_min = CMath::min(x_min, 1.0);
00860                 xj_max[j] = CMath::max(xj_max[j], 1.0);
00861                 C_sum[j] += C[GETI(ind)];
00862                 if(y[ind] == -1)
00863                     xjneg_sum[j] += C[GETI(ind)];
00864                 else
00865                     xjpos_sum[j] += C[GETI(ind)];
00866             }
00867         }
00868         else
00869         {
00870             iterator=x->get_feature_iterator(j);
00871             while (x->get_next_feature(ind, val, iterator))
00872             {
00873                 x_min = CMath::min(x_min, val);
00874                 xj_max[j] = CMath::max(xj_max[j], val);
00875                 C_sum[j] += C[GETI(ind)];
00876                 if(y[ind] == -1)
00877                     xjneg_sum[j] += C[GETI(ind)]*val;
00878                 else
00879                     xjpos_sum[j] += C[GETI(ind)]*val;
00880             }
00881             x->free_feature_iterator(iterator);
00882         }
00883     }
00884 
00885     CTime start_time;
00886     while (iter < max_iterations && !CSignal::cancel_computations())
00887     {
00888         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00889           break;
00890 
00891         Gmax_new = 0;
00892 
00893         for(j=0; j<active_size; j++)
00894         {
00895             int i = j+rand()%(active_size-j);
00896             CMath::swap(index[i], index[j]);
00897         }
00898 
00899         for(s=0; s<active_size; s++)
00900         {
00901             j = index[s];
00902             sum1 = 0;
00903             sum2 = 0;
00904             H = 0;
00905 
00906             if (use_bias && j==n)
00907             {
00908                 for (ind=0; ind<l; ind++)
00909                 {
00910                     double exp_wTxind = exp_wTx[ind];
00911                     double tmp1 = 1.0/(1+exp_wTxind);
00912                     double tmp2 = C[GETI(ind)]*tmp1;
00913                     double tmp3 = tmp2*exp_wTxind;
00914                     sum2 += tmp2;
00915                     sum1 += tmp3;
00916                     H += tmp1*tmp3;
00917                 }
00918             }
00919             else
00920             {
00921                 iterator=x->get_feature_iterator(j);
00922                 while (x->get_next_feature(ind, val, iterator))
00923                 {
00924                     double exp_wTxind = exp_wTx[ind];
00925                     double tmp1 = val/(1+exp_wTxind);
00926                     double tmp2 = C[GETI(ind)]*tmp1;
00927                     double tmp3 = tmp2*exp_wTxind;
00928                     sum2 += tmp2;
00929                     sum1 += tmp3;
00930                     H += tmp1*tmp3;
00931                 }
00932                 x->free_feature_iterator(iterator);
00933             }
00934 
00935             G = -sum2 + xjneg_sum[j];
00936 
00937             double Gp = G+1;
00938             double Gn = G-1;
00939             double violation = 0;
00940             if(w.vector[j] == 0)
00941             {
00942                 if(Gp < 0)
00943                     violation = -Gp;
00944                 else if(Gn > 0)
00945                     violation = Gn;
00946                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00947                 {
00948                     active_size--;
00949                     CMath::swap(index[s], index[active_size]);
00950                     s--;
00951                     continue;
00952                 }
00953             }
00954             else if(w.vector[j] > 0)
00955                 violation = fabs(Gp);
00956             else
00957                 violation = fabs(Gn);
00958 
00959             Gmax_new = CMath::max(Gmax_new, violation);
00960 
00961             // obtain Newton direction d
00962             if(Gp <= H*w.vector[j])
00963                 d = -Gp/H;
00964             else if(Gn >= H*w.vector[j])
00965                 d = -Gn/H;
00966             else
00967                 d = -w.vector[j];
00968 
00969             if(fabs(d) < 1.0e-12)
00970                 continue;
00971 
00972             d = CMath::min(CMath::max(d,-10.0),10.0);
00973 
00974             double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
00975             int num_linesearch;
00976             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00977             {
00978                 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
00979 
00980                 if(x_min >= 0)
00981                 {
00982                     double tmp = exp(d*xj_max[j]);
00983                     appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00984                     appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00985                     if(CMath::min(appxcond1,appxcond2) <= 0)
00986                     {
00987                         if (use_bias && j==n)
00988                         {
00989                             for (ind=0; ind<l; ind++)
00990                                 exp_wTx[ind] *= exp(d);
00991                         }
00992 
00993                         else
00994                         {
00995                             iterator=x->get_feature_iterator(j);
00996                             while (x->get_next_feature(ind, val, iterator))
00997                                 exp_wTx[ind] *= exp(d*val);
00998                             x->free_feature_iterator(iterator);
00999                         }
01000                         break;
01001                     }
01002                 }
01003 
01004                 cond += d*xjneg_sum[j];
01005 
01006                 int i = 0;
01007 
01008                 if (use_bias && j==n)
01009                 {
01010                     for (ind=0; ind<l; ind++)
01011                     {
01012                         double exp_dx = exp(d);
01013                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01014                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01015                         i++;
01016                     }
01017                 }
01018                 else
01019                 {
01020 
01021                     iterator=x->get_feature_iterator(j);
01022                     while (x->get_next_feature(ind, val, iterator))
01023                     {
01024                         double exp_dx = exp(d*val);
01025                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01026                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01027                         i++;
01028                     }
01029                     x->free_feature_iterator(iterator);
01030                 }
01031 
01032                 if(cond <= 0)
01033                 {
01034                     i = 0;
01035                     if (use_bias && j==n)
01036                     {
01037                         for (ind=0; ind<l; ind++)
01038                         {
01039                             exp_wTx[ind] = exp_wTx_new[i];
01040                             i++;
01041                         }
01042                     }
01043                     else
01044                     {
01045                         iterator=x->get_feature_iterator(j);
01046                         while (x->get_next_feature(ind, val, iterator))
01047                         {
01048                             exp_wTx[ind] = exp_wTx_new[i];
01049                             i++;
01050                         }
01051                         x->free_feature_iterator(iterator);
01052                     }
01053                     break;
01054                 }
01055                 else
01056                 {
01057                     d *= 0.5;
01058                     delta *= 0.5;
01059                 }
01060             }
01061 
01062             w.vector[j] += d;
01063 
01064             // recompute exp_wTx[] if line search takes too many steps
01065             if(num_linesearch >= max_num_linesearch)
01066             {
01067                 SG_INFO("#");
01068                 for(int i=0; i<l; i++)
01069                     exp_wTx[i] = 0;
01070 
01071                 for(int i=0; i<w_size; i++)
01072                 {
01073                     if(w.vector[i]==0) continue;
01074 
01075                     if (use_bias && i==n)
01076                     {
01077                         for (ind=0; ind<l; ind++)
01078                             exp_wTx[ind] += w.vector[i];
01079                     }
01080                     else
01081                     {
01082                         iterator=x->get_feature_iterator(i);
01083                         while (x->get_next_feature(ind, val, iterator))
01084                             exp_wTx[ind] += w.vector[i]*val;
01085                         x->free_feature_iterator(iterator);
01086                     }
01087                 }
01088 
01089                 for(int i=0; i<l; i++)
01090                     exp_wTx[i] = exp(exp_wTx[i]);
01091             }
01092         }
01093 
01094         if(iter == 0)
01095             Gmax_init = Gmax_new;
01096         iter++;
01097         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01098 
01099         if(Gmax_new <= eps*Gmax_init)
01100         {
01101             if(active_size == w_size)
01102                 break;
01103             else
01104             {
01105                 active_size = w_size;
01106                 Gmax_old = CMath::INFTY;
01107                 continue;
01108             }
01109         }
01110 
01111         Gmax_old = Gmax_new;
01112     }
01113 
01114     SG_DONE();
01115     SG_INFO("optimization finished, #iter = %d\n", iter);
01116     if(iter >= max_iterations)
01117         SG_WARNING("\nWARNING: reaching max number of iterations\n");
01118 
01119     // calculate objective value
01120 
01121     double v = 0;
01122     int nnz = 0;
01123     for(j=0; j<w_size; j++)
01124         if(w.vector[j] != 0)
01125         {
01126             v += fabs(w.vector[j]);
01127             nnz++;
01128         }
01129     for(j=0; j<l; j++)
01130         if(y[j] == 1)
01131             v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01132         else
01133             v += C[GETI(j)]*log(1+exp_wTx[j]);
01134 
01135     SG_INFO("Objective value = %lf\n", v);
01136     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01137 
01138     SG_FREE(index);
01139     SG_FREE(y);
01140     SG_FREE(exp_wTx);
01141     SG_FREE(exp_wTx_new);
01142     SG_FREE(xj_max);
01143     SG_FREE(C_sum);
01144     SG_FREE(xjneg_sum);
01145     SG_FREE(xjpos_sum);
01146 }
01147 
01148 // A coordinate descent algorithm for 
01149 // the dual of L2-regularized logistic regression problems
01150 //
01151 //  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
01152 //    s.t.      0 <= \alpha_i <= upper_bound_i,
01153 // 
01154 //  where Qij = yi yj xi^T xj and 
01155 //  upper_bound_i = Cp if y_i = 1
01156 //  upper_bound_i = Cn if y_i = -1
01157 //
01158 // Given: 
01159 // x, y, Cp, Cn
01160 // eps is the stopping tolerance
01161 //
01162 // solution will be put in w
01163 //
01164 // See Algorithm 5 of Yu et al., MLJ 2010
01165 
01166 #undef GETI
01167 #define GETI(i) (y[i]+1)
01168 // To support weights for instances, use GETI(i) (i)
01169 
01170 void CLibLinear::solve_l2r_lr_dual(const problem *prob, double eps, double Cp, double Cn)
01171 {
01172     int l = prob->l;
01173     int w_size = prob->n;
01174     int i, s, iter = 0;
01175     double *xTx = new double[l];
01176     int max_iter = 1000;
01177     int *index = new int[l];        
01178     double *alpha = new double[2*l]; // store alpha and C - alpha
01179     int32_t *y = SG_MALLOC(int32_t, l);
01180     int max_inner_iter = 100; // for inner Newton
01181     double innereps = 1e-2; 
01182     double innereps_min = CMath::min(1e-8, eps);
01183     double upper_bound[3] = {Cn, 0, Cp};
01184     double Gmax_init = 0;
01185 
01186     for(i=0; i<l; i++)
01187     {
01188         if(prob->y[i] > 0)
01189         {
01190             y[i] = +1; 
01191         }
01192         else
01193         {
01194             y[i] = -1;
01195         }
01196     }
01197         
01198     // Initial alpha can be set here. Note that
01199     // 0 < alpha[i] < upper_bound[GETI(i)]
01200     // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
01201     for(i=0; i<l; i++)
01202     {
01203         alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
01204         alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
01205     }
01206 
01207     for(i=0; i<w_size; i++)
01208         w[i] = 0;
01209     for(i=0; i<l; i++)
01210     {
01211         xTx[i] = prob->x->dot(i, prob->x,i);
01212         prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
01213 
01214         if (prob->use_bias)
01215         {
01216             w.vector[w_size]+=y[i]*alpha[2*i];
01217             xTx[i]+=1;
01218         }
01219         index[i] = i;
01220     }
01221 
01222     while (iter < max_iter)
01223     {
01224         for (i=0; i<l; i++)
01225         {
01226             int j = i+rand()%(l-i);
01227             CMath::swap(index[i], index[j]);
01228         }
01229         int newton_iter = 0;
01230         double Gmax = 0;
01231         for (s=0; s<l; s++)
01232         {
01233             i = index[s];
01234             int32_t yi = y[i];
01235             double C = upper_bound[GETI(i)];
01236             double ywTx = 0, xisq = xTx[i];
01237 
01238             ywTx = prob->x->dense_dot(i, w.vector, w_size);
01239             if (prob->use_bias)
01240                 ywTx+=w.vector[w_size];
01241 
01242             ywTx *= y[i];
01243             double a = xisq, b = ywTx;
01244 
01245             // Decide to minimize g_1(z) or g_2(z)
01246             int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
01247             if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) 
01248             {
01249                 ind1 = 2*i+1;
01250                 ind2 = 2*i;
01251                 sign = -1;
01252             }
01253 
01254             //  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
01255             double alpha_old = alpha[ind1];
01256             double z = alpha_old;
01257             if(C - z < 0.5 * C) 
01258                 z = 0.1*z;
01259             double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
01260             Gmax = CMath::max(Gmax, CMath::abs(gp));
01261 
01262             // Newton method on the sub-problem
01263             const double eta = 0.1; // xi in the paper
01264             int inner_iter = 0;
01265             while (inner_iter <= max_inner_iter) 
01266             {
01267                 if(fabs(gp) < innereps)
01268                     break;
01269                 double gpp = a + C/(C-z)/z;
01270                 double tmpz = z - gp/gpp;
01271                 if(tmpz <= 0) 
01272                     z *= eta;
01273                 else // tmpz in (0, C)
01274                     z = tmpz;
01275                 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
01276                 newton_iter++;
01277                 inner_iter++;
01278             }
01279 
01280             if(inner_iter > 0) // update w
01281             {
01282                 alpha[ind1] = z;
01283                 alpha[ind2] = C-z;
01284 
01285                 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
01286 
01287                 if (prob->use_bias)
01288                     w.vector[w_size]+=sign*(z-alpha_old)*yi;
01289             }
01290         }
01291 
01292         iter++;
01293         if(iter == 0)
01294             Gmax_init = Gmax;
01295 
01296         SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01297 
01298         if(Gmax < eps) 
01299             break;
01300 
01301         if(newton_iter <= l/10) 
01302             innereps = CMath::max(innereps_min, 0.1*innereps);
01303 
01304     }
01305 
01306     SG_DONE();
01307     SG_INFO("optimization finished, #iter = %d\n",iter);
01308     if (iter >= max_iter)
01309         SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
01310 
01311     // calculate objective value
01312     
01313     double v = 0;
01314     for(i=0; i<w_size; i++)
01315         v += w[i] * w[i];
01316     v *= 0.5;
01317     for(i=0; i<l; i++)
01318         v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 
01319             - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
01320     SG_INFO("Objective value = %lf\n", v);
01321 
01322     delete [] xTx;
01323     delete [] alpha;
01324     delete [] y;
01325     delete [] index;
01326 }
01327 
01328 
01329 void CLibLinear::set_linear_term(const SGVector<float64_t> linear_term)
01330 {
01331     if (!m_labels)
01332         SG_ERROR("Please assign labels first!\n");
01333 
01334     int32_t num_labels=m_labels->get_num_labels();
01335 
01336     if (num_labels!=linear_term.vlen)
01337     {
01338         SG_ERROR("Number of labels (%d) does not match number"
01339                 " of entries (%d) in linear term \n", num_labels,
01340                 linear_term.vlen);
01341     }
01342 
01343     m_linear_term=linear_term;
01344 }
01345 
01346 SGVector<float64_t> CLibLinear::get_linear_term()
01347 {
01348     if (!m_linear_term.vlen || !m_linear_term.vector)
01349         SG_ERROR("Please assign linear term first!\n");
01350 
01351     return m_linear_term;
01352 }
01353 
01354 void CLibLinear::init_linear_term()
01355 {
01356     if (!m_labels)
01357         SG_ERROR("Please assign labels first!\n");
01358 
01359     m_linear_term=SGVector<float64_t>(m_labels->get_num_labels());
01360     SGVector<float64_t>::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0);
01361 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation