LibLinear.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2010 Soeren Sonnenburg
00008  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00009  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include <shogun/lib/config.h>
00012 
00013 #ifdef HAVE_LAPACK
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/lib/Signal.h>
00016 #include <shogun/lib/Time.h>
00017 #include <shogun/base/Parameter.h>
00018 #include <shogun/classifier/svm/LibLinear.h>
00019 #include <shogun/classifier/svm/SVM_linear.h>
00020 #include <shogun/classifier/svm/Tron.h>
00021 #include <shogun/features/DotFeatures.h>
00022 
00023 using namespace shogun;
00024 
00025 CLibLinear::CLibLinear()
00026 : CLinearMachine()
00027 {
00028     init();
00029 }
00030 
00031 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00032 : CLinearMachine()
00033 {
00034     init();
00035     liblinear_solver_type=l;
00036 }
00037 
00038 CLibLinear::CLibLinear(
00039     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00040 : CLinearMachine()
00041 {
00042     init();
00043     C1=C;
00044     C2=C;
00045     use_bias=true;
00046 
00047     set_features(traindat);
00048     set_labels(trainlab);
00049     init_linear_term();
00050 }
00051 
00052 void CLibLinear::init()
00053 {
00054     liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00055     use_bias=false;
00056     C1=1;
00057     C2=1;
00058     set_max_iterations();
00059     epsilon=1e-5;
00060 
00061     SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00062     SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00063     SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00064             MS_NOT_AVAILABLE);
00065     SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00066     SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00067             MS_NOT_AVAILABLE);
00068     SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
00069     SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
00070             "Type of LibLinear solver.", MS_NOT_AVAILABLE);
00071 }
00072 
00073 CLibLinear::~CLibLinear()
00074 {
00075     m_linear_term.destroy_vector();
00076 }
00077 
00078 bool CLibLinear::train_machine(CFeatures* data)
00079 {
00080     CSignal::clear_cancel();
00081     ASSERT(labels);
00082 
00083     if (data)
00084     {
00085         if (!data->has_property(FP_DOT))
00086             SG_ERROR("Specified features are not of type CDotFeatures\n");
00087 
00088         set_features((CDotFeatures*) data);
00089     }
00090     ASSERT(features);
00091     ASSERT(labels->is_two_class_labeling());
00092 
00093 
00094     int32_t num_train_labels=labels->get_num_labels();
00095     int32_t num_feat=features->get_dim_feature_space();
00096     int32_t num_vec=features->get_num_vectors();
00097 
00098     if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00099             (liblinear_solver_type == L1R_LR) )
00100     {
00101         if (num_feat!=num_train_labels)
00102         {
00103             SG_ERROR("L1 methods require the data to be transposed: "
00104                     "number of features %d does not match number of "
00105                     "training labels %d\n",
00106                     num_feat, num_train_labels);
00107         }
00108         CMath::swap(num_feat, num_vec);
00109 
00110     }
00111     else
00112     {
00113         if (num_vec!=num_train_labels)
00114         {
00115             SG_ERROR("number of vectors %d does not match "
00116                     "number of training labels %d\n",
00117                     num_vec, num_train_labels);
00118         }
00119     }
00120     SG_FREE(w);
00121     if (use_bias)
00122         w=SG_MALLOC(float64_t, num_feat+1);
00123     else
00124         w=SG_MALLOC(float64_t, num_feat+0);
00125     w_dim=num_feat;
00126 
00127     problem prob;
00128     if (use_bias)
00129     {
00130         prob.n=w_dim+1;
00131         memset(w, 0, sizeof(float64_t)*(w_dim+1));
00132     }
00133     else
00134     {
00135         prob.n=w_dim;
00136         memset(w, 0, sizeof(float64_t)*(w_dim+0));
00137     }
00138     prob.l=num_vec;
00139     prob.x=features;
00140     prob.y=SG_MALLOC(int, prob.l);
00141     prob.use_bias=use_bias;
00142 
00143     for (int32_t i=0; i<prob.l; i++)
00144         prob.y[i]=labels->get_int_label(i);
00145 
00146     int pos = 0;
00147     int neg = 0;
00148     for(int i=0;i<prob.l;i++)
00149     {
00150         if(prob.y[i]==+1)
00151             pos++;
00152     }
00153     neg = prob.l - pos;
00154 
00155     SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00156 
00157     function *fun_obj=NULL;
00158     double Cp=C1;
00159     double Cn=C2;
00160     switch (liblinear_solver_type)
00161     {
00162         case L2R_LR:
00163         {
00164             fun_obj=new l2r_lr_fun(&prob, Cp, Cn);
00165             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00166             SG_DEBUG("starting L2R_LR training via tron\n");
00167             tron_obj.tron(w, max_train_time);
00168             SG_DEBUG("done with tron\n");
00169             delete fun_obj;
00170             break;
00171         }
00172         case L2R_L2LOSS_SVC:
00173         {
00174             fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn);
00175             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00176             tron_obj.tron(w, max_train_time);
00177             delete fun_obj;
00178             break;
00179         }
00180         case L2R_L2LOSS_SVC_DUAL:
00181             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00182             break;
00183         case L2R_L1LOSS_SVC_DUAL:
00184             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00185             break;
00186         case L1R_L2LOSS_SVC:
00187         {
00188             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00189             solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00190             break;
00191         }
00192         case L1R_LR:
00193         {
00194             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00195             solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00196             break;
00197         }
00198         case MCSVM_CS:
00199         {
00200             SG_NOTIMPLEMENTED;
00201             /* TODO...
00202             model_->w=Malloc(double, n*nr_class);
00203             for(i=0;i<nr_class;i++)
00204                 for(j=start[i];j<start[i]+count[i];j++)
00205                     sub_prob.y[j] = i;
00206             Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
00207             Solver.Solve(model_->w);
00208             */
00209         }
00210         default:
00211             SG_ERROR("Error: unknown solver_type\n");
00212             break;
00213     }
00214 
00215     if (use_bias)
00216         set_bias(w[w_dim]);
00217     else
00218         set_bias(0);
00219 
00220     SG_FREE(prob.y);
00221 
00222     return true;
00223 }
00224 
00225 // A coordinate descent algorithm for
00226 // L1-loss and L2-loss SVM dual problems
00227 //
00228 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
00229 //    s.t.      0 <= alpha_i <= upper_bound_i,
00230 //
00231 //  where Qij = yi yj xi^T xj and
00232 //  D is a diagonal matrix
00233 //
00234 // In L1-SVM case:
00235 //      upper_bound_i = Cp if y_i = 1
00236 //      upper_bound_i = Cn if y_i = -1
00237 //      D_ii = 0
00238 // In L2-SVM case:
00239 //      upper_bound_i = INF
00240 //      D_ii = 1/(2*Cp) if y_i = 1
00241 //      D_ii = 1/(2*Cn) if y_i = -1
00242 //
00243 // Given:
00244 // x, y, Cp, Cn
00245 // eps is the stopping tolerance
00246 //
00247 // solution will be put in w
00248 
00249 #undef GETI
00250 #define GETI(i) (y[i]+1)
00251 // To support weights for instances, use GETI(i) (i)
00252 
00253 void CLibLinear::solve_l2r_l1l2_svc(
00254             const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00255 {
00256     int l = prob->l;
00257     int w_size = prob->n;
00258     int i, s, iter = 0;
00259     double C, d, G;
00260     double *QD = SG_MALLOC(double, l);
00261     int *index = SG_MALLOC(int, l);
00262     double *alpha = SG_MALLOC(double, l);
00263     int32_t *y = SG_MALLOC(int32_t, l);
00264     int active_size = l;
00265 
00266     // PG: projected gradient, for shrinking and stopping
00267     double PG;
00268     double PGmax_old = CMath::INFTY;
00269     double PGmin_old = -CMath::INFTY;
00270     double PGmax_new, PGmin_new;
00271 
00272     // default solver_type: L2R_L2LOSS_SVC_DUAL
00273     double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00274     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00275     if(st == L2R_L1LOSS_SVC_DUAL)
00276     {
00277         diag[0] = 0;
00278         diag[2] = 0;
00279         upper_bound[0] = Cn;
00280         upper_bound[2] = Cp;
00281     }
00282 
00283     int n = prob->n;
00284 
00285     if (prob->use_bias)
00286         n--;
00287 
00288     for(i=0; i<w_size; i++)
00289         w[i] = 0;
00290 
00291     for(i=0; i<l; i++)
00292     {
00293         alpha[i] = 0;
00294         if(prob->y[i] > 0)
00295         {
00296             y[i] = +1;
00297         }
00298         else
00299         {
00300             y[i] = -1;
00301         }
00302         QD[i] = diag[GETI(i)];
00303 
00304         QD[i] += prob->x->dot(i, prob->x,i);
00305         index[i] = i;
00306     }
00307 
00308 
00309     CTime start_time;
00310     while (iter < max_iterations && !CSignal::cancel_computations())
00311     {
00312         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00313           break;
00314 
00315         PGmax_new = -CMath::INFTY;
00316         PGmin_new = CMath::INFTY;
00317 
00318         for (i=0; i<active_size; i++)
00319         {
00320             int j = i+rand()%(active_size-i);
00321             CMath::swap(index[i], index[j]);
00322         }
00323 
00324         for (s=0;s<active_size;s++)
00325         {
00326             i = index[s];
00327             int32_t yi = y[i];
00328 
00329             G = prob->x->dense_dot(i, w, n);
00330             if (prob->use_bias)
00331                 G+=w[n];
00332 
00333             if (m_linear_term.vector)
00334                 G = G*yi + m_linear_term.vector[i];
00335             else
00336                 G = G*yi-1;
00337 
00338             C = upper_bound[GETI(i)];
00339             G += alpha[i]*diag[GETI(i)];
00340 
00341             PG = 0;
00342             if (alpha[i] == 0)
00343             {
00344                 if (G > PGmax_old)
00345                 {
00346                     active_size--;
00347                     CMath::swap(index[s], index[active_size]);
00348                     s--;
00349                     continue;
00350                 }
00351                 else if (G < 0)
00352                     PG = G;
00353             }
00354             else if (alpha[i] == C)
00355             {
00356                 if (G < PGmin_old)
00357                 {
00358                     active_size--;
00359                     CMath::swap(index[s], index[active_size]);
00360                     s--;
00361                     continue;
00362                 }
00363                 else if (G > 0)
00364                     PG = G;
00365             }
00366             else
00367                 PG = G;
00368 
00369             PGmax_new = CMath::max(PGmax_new, PG);
00370             PGmin_new = CMath::min(PGmin_new, PG);
00371 
00372             if(fabs(PG) > 1.0e-12)
00373             {
00374                 double alpha_old = alpha[i];
00375                 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00376                 d = (alpha[i] - alpha_old)*yi;
00377 
00378                 prob->x->add_to_dense_vec(d, i, w, n);
00379 
00380                 if (prob->use_bias)
00381                     w[n]+=d;
00382             }
00383         }
00384 
00385         iter++;
00386         float64_t gap=PGmax_new - PGmin_new;
00387         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00388 
00389         if(gap <= eps)
00390         {
00391             if(active_size == l)
00392                 break;
00393             else
00394             {
00395                 active_size = l;
00396                 PGmax_old = CMath::INFTY;
00397                 PGmin_old = -CMath::INFTY;
00398                 continue;
00399             }
00400         }
00401         PGmax_old = PGmax_new;
00402         PGmin_old = PGmin_new;
00403         if (PGmax_old <= 0)
00404             PGmax_old = CMath::INFTY;
00405         if (PGmin_old >= 0)
00406             PGmin_old = -CMath::INFTY;
00407     }
00408 
00409     SG_DONE();
00410     SG_INFO("optimization finished, #iter = %d\n",iter);
00411     if (iter >= max_iterations)
00412     {
00413         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00414                 "(also see liblinear FAQ)\n\n");
00415     }
00416 
00417     // calculate objective value
00418 
00419     double v = 0;
00420     int nSV = 0;
00421     for(i=0; i<w_size; i++)
00422         v += w[i]*w[i];
00423     for(i=0; i<l; i++)
00424     {
00425         v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00426         if(alpha[i] > 0)
00427             ++nSV;
00428     }
00429     SG_INFO("Objective value = %lf\n",v/2);
00430     SG_INFO("nSV = %d\n",nSV);
00431 
00432     delete [] QD;
00433     delete [] alpha;
00434     delete [] y;
00435     delete [] index;
00436 }
00437 
00438 // A coordinate descent algorithm for
00439 // L1-regularized L2-loss support vector classification
00440 //
00441 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
00442 //
00443 // Given:
00444 // x, y, Cp, Cn
00445 // eps is the stopping tolerance
00446 //
00447 // solution will be put in w
00448 
00449 #undef GETI
00450 #define GETI(i) (y[i]+1)
00451 // To support weights for instances, use GETI(i) (i)
00452 
00453 void CLibLinear::solve_l1r_l2_svc(
00454     problem *prob_col, double eps, double Cp, double Cn)
00455 {
00456     int l = prob_col->l;
00457     int w_size = prob_col->n;
00458     int j, s, iter = 0;
00459     int active_size = w_size;
00460     int max_num_linesearch = 20;
00461 
00462     double sigma = 0.01;
00463     double d, G_loss, G, H;
00464     double Gmax_old = CMath::INFTY;
00465     double Gmax_new;
00466     double Gmax_init=0;
00467     double d_old, d_diff;
00468     double loss_old=0, loss_new;
00469     double appxcond, cond;
00470 
00471     int *index = SG_MALLOC(int, w_size);
00472     int32_t *y = SG_MALLOC(int32_t, l);
00473     double *b = SG_MALLOC(double, l); // b = 1-ywTx
00474     double *xj_sq = SG_MALLOC(double, w_size);
00475 
00476     CDotFeatures* x = (CDotFeatures*) prob_col->x;
00477     void* iterator;
00478     int32_t ind;
00479     float64_t val;
00480 
00481     double C[3] = {Cn,0,Cp};
00482 
00483     int n = prob_col->n;
00484     if (prob_col->use_bias)
00485         n--;
00486 
00487     for(j=0; j<l; j++)
00488     {
00489         b[j] = 1;
00490         if(prob_col->y[j] > 0)
00491             y[j] = 1;
00492         else
00493             y[j] = -1;
00494     }
00495 
00496     for(j=0; j<w_size; j++)
00497     {
00498         w[j] = 0;
00499         index[j] = j;
00500         xj_sq[j] = 0;
00501 
00502         if (use_bias && j==n)
00503         {
00504             for (ind=0; ind<l; ind++)
00505                 xj_sq[n] += C[GETI(ind)];
00506         }
00507         else
00508         {
00509             iterator=x->get_feature_iterator(j);
00510             while (x->get_next_feature(ind, val, iterator))
00511                 xj_sq[j] += C[GETI(ind)]*val*val;
00512             x->free_feature_iterator(iterator);
00513         }
00514     }
00515 
00516 
00517     CTime start_time;
00518     while (iter < max_iterations && !CSignal::cancel_computations())
00519     {
00520         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00521           break;
00522 
00523         Gmax_new  = 0;
00524 
00525         for(j=0; j<active_size; j++)
00526         {
00527             int i = j+rand()%(active_size-j);
00528             CMath::swap(index[i], index[j]);
00529         }
00530 
00531         for(s=0; s<active_size; s++)
00532         {
00533             j = index[s];
00534             G_loss = 0;
00535             H = 0;
00536 
00537             if (use_bias && j==n)
00538             {
00539                 for (ind=0; ind<l; ind++)
00540                 {
00541                     if(b[ind] > 0)
00542                     {
00543                         double tmp = C[GETI(ind)]*y[ind];
00544                         G_loss -= tmp*b[ind];
00545                         H += tmp*y[ind];
00546                     }
00547                 }
00548             }
00549             else
00550             {
00551                 iterator=x->get_feature_iterator(j);
00552 
00553                 while (x->get_next_feature(ind, val, iterator))
00554                 {
00555                     if(b[ind] > 0)
00556                     {
00557                         double tmp = C[GETI(ind)]*val*y[ind];
00558                         G_loss -= tmp*b[ind];
00559                         H += tmp*val*y[ind];
00560                     }
00561                 }
00562                 x->free_feature_iterator(iterator);
00563             }
00564 
00565             G_loss *= 2;
00566 
00567             G = G_loss;
00568             H *= 2;
00569             H = CMath::max(H, 1e-12);
00570 
00571             double Gp = G+1;
00572             double Gn = G-1;
00573             double violation = 0;
00574             if(w[j] == 0)
00575             {
00576                 if(Gp < 0)
00577                     violation = -Gp;
00578                 else if(Gn > 0)
00579                     violation = Gn;
00580                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00581                 {
00582                     active_size--;
00583                     CMath::swap(index[s], index[active_size]);
00584                     s--;
00585                     continue;
00586                 }
00587             }
00588             else if(w[j] > 0)
00589                 violation = fabs(Gp);
00590             else
00591                 violation = fabs(Gn);
00592 
00593             Gmax_new = CMath::max(Gmax_new, violation);
00594 
00595             // obtain Newton direction d
00596             if(Gp <= H*w[j])
00597                 d = -Gp/H;
00598             else if(Gn >= H*w[j])
00599                 d = -Gn/H;
00600             else
00601                 d = -w[j];
00602 
00603             if(fabs(d) < 1.0e-12)
00604                 continue;
00605 
00606             double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00607             d_old = 0;
00608             int num_linesearch;
00609             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00610             {
00611                 d_diff = d_old - d;
00612                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00613 
00614                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00615                 if(appxcond <= 0)
00616                 {
00617                     if (use_bias && j==n)
00618                     {
00619                         for (ind=0; ind<l; ind++)
00620                             b[ind] += d_diff*y[ind];
00621                         break;
00622                     }
00623                     else
00624                     {
00625                         iterator=x->get_feature_iterator(j);
00626                         while (x->get_next_feature(ind, val, iterator))
00627                             b[ind] += d_diff*val*y[ind];
00628 
00629                         x->free_feature_iterator(iterator);
00630                         break;
00631                     }
00632                 }
00633 
00634                 if(num_linesearch == 0)
00635                 {
00636                     loss_old = 0;
00637                     loss_new = 0;
00638 
00639                     if (use_bias && j==n)
00640                     {
00641                         for (ind=0; ind<l; ind++)
00642                         {
00643                             if(b[ind] > 0)
00644                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00645                             double b_new = b[ind] + d_diff*y[ind];
00646                             b[ind] = b_new;
00647                             if(b_new > 0)
00648                                 loss_new += C[GETI(ind)]*b_new*b_new;
00649                         }
00650                     }
00651                     else
00652                     {
00653                         iterator=x->get_feature_iterator(j);
00654                         while (x->get_next_feature(ind, val, iterator))
00655                         {
00656                             if(b[ind] > 0)
00657                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00658                             double b_new = b[ind] + d_diff*val*y[ind];
00659                             b[ind] = b_new;
00660                             if(b_new > 0)
00661                                 loss_new += C[GETI(ind)]*b_new*b_new;
00662                         }
00663                         x->free_feature_iterator(iterator);
00664                     }
00665                 }
00666                 else
00667                 {
00668                     loss_new = 0;
00669                     if (use_bias && j==n)
00670                     {
00671                         for (ind=0; ind<l; ind++)
00672                         {
00673                             double b_new = b[ind] + d_diff*y[ind];
00674                             b[ind] = b_new;
00675                             if(b_new > 0)
00676                                 loss_new += C[GETI(ind)]*b_new*b_new;
00677                         }
00678                     }
00679                     else
00680                     {
00681                         iterator=x->get_feature_iterator(j);
00682                         while (x->get_next_feature(ind, val, iterator))
00683                         {
00684                             double b_new = b[ind] + d_diff*val*y[ind];
00685                             b[ind] = b_new;
00686                             if(b_new > 0)
00687                                 loss_new += C[GETI(ind)]*b_new*b_new;
00688                         }
00689                         x->free_feature_iterator(iterator);
00690                     }
00691                 }
00692 
00693                 cond = cond + loss_new - loss_old;
00694                 if(cond <= 0)
00695                     break;
00696                 else
00697                 {
00698                     d_old = d;
00699                     d *= 0.5;
00700                     delta *= 0.5;
00701                 }
00702             }
00703 
00704             w[j] += d;
00705 
00706             // recompute b[] if line search takes too many steps
00707             if(num_linesearch >= max_num_linesearch)
00708             {
00709                 SG_INFO("#");
00710                 for(int i=0; i<l; i++)
00711                     b[i] = 1;
00712 
00713                 for(int i=0; i<n; i++)
00714                 {
00715                     if(w[i]==0)
00716                         continue;
00717 
00718                     iterator=x->get_feature_iterator(i);
00719                     while (x->get_next_feature(ind, val, iterator))
00720                         b[ind] -= w[i]*val*y[ind];
00721                     x->free_feature_iterator(iterator);
00722                 }
00723 
00724                 if (use_bias && w[n])
00725                 {
00726                     for (ind=0; ind<l; ind++)
00727                         b[ind] -= w[n]*y[ind];
00728                 }
00729             }
00730         }
00731 
00732         if(iter == 0)
00733             Gmax_init = Gmax_new;
00734         iter++;
00735 
00736         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00737 
00738         if(Gmax_new <= eps*Gmax_init)
00739         {
00740             if(active_size == w_size)
00741                 break;
00742             else
00743             {
00744                 active_size = w_size;
00745                 Gmax_old = CMath::INFTY;
00746                 continue;
00747             }
00748         }
00749 
00750         Gmax_old = Gmax_new;
00751     }
00752 
00753     SG_DONE();
00754     SG_INFO("optimization finished, #iter = %d\n", iter);
00755     if(iter >= max_iterations)
00756         SG_WARNING("\nWARNING: reaching max number of iterations\n");
00757 
00758     // calculate objective value
00759 
00760     double v = 0;
00761     int nnz = 0;
00762     for(j=0; j<w_size; j++)
00763     {
00764         if(w[j] != 0)
00765         {
00766             v += fabs(w[j]);
00767             nnz++;
00768         }
00769     }
00770     for(j=0; j<l; j++)
00771         if(b[j] > 0)
00772             v += C[GETI(j)]*b[j]*b[j];
00773 
00774     SG_INFO("Objective value = %lf\n", v);
00775     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00776 
00777     delete [] index;
00778     delete [] y;
00779     delete [] b;
00780     delete [] xj_sq;
00781 }
00782 
00783 // A coordinate descent algorithm for
00784 // L1-regularized logistic regression problems
00785 //
00786 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
00787 //
00788 // Given:
00789 // x, y, Cp, Cn
00790 // eps is the stopping tolerance
00791 //
00792 // solution will be put in w
00793 
00794 #undef GETI
00795 #define GETI(i) (y[i]+1)
00796 // To support weights for instances, use GETI(i) (i)
00797 
00798 void CLibLinear::solve_l1r_lr(
00799     const problem *prob_col, double eps,
00800     double Cp, double Cn)
00801 {
00802     int l = prob_col->l;
00803     int w_size = prob_col->n;
00804     int j, s, iter = 0;
00805     int active_size = w_size;
00806     int max_num_linesearch = 20;
00807 
00808     double x_min = 0;
00809     double sigma = 0.01;
00810     double d, G, H;
00811     double Gmax_old = CMath::INFTY;
00812     double Gmax_new;
00813     double Gmax_init=0;
00814     double sum1, appxcond1;
00815     double sum2, appxcond2;
00816     double cond;
00817 
00818     int *index = SG_MALLOC(int, w_size);
00819     int32_t *y = SG_MALLOC(int32_t, l);
00820     double *exp_wTx = SG_MALLOC(double, l);
00821     double *exp_wTx_new = SG_MALLOC(double, l);
00822     double *xj_max = SG_MALLOC(double, w_size);
00823     double *C_sum = SG_MALLOC(double, w_size);
00824     double *xjneg_sum = SG_MALLOC(double, w_size);
00825     double *xjpos_sum = SG_MALLOC(double, w_size);
00826 
00827     CDotFeatures* x = prob_col->x;
00828     void* iterator;
00829     int ind;
00830     double val;
00831 
00832     double C[3] = {Cn,0,Cp};
00833 
00834     int n = prob_col->n;
00835     if (prob_col->use_bias)
00836         n--;
00837 
00838     for(j=0; j<l; j++)
00839     {
00840         exp_wTx[j] = 1;
00841         if(prob_col->y[j] > 0)
00842             y[j] = 1;
00843         else
00844             y[j] = -1;
00845     }
00846     for(j=0; j<w_size; j++)
00847     {
00848         w[j] = 0;
00849         index[j] = j;
00850         xj_max[j] = 0;
00851         C_sum[j] = 0;
00852         xjneg_sum[j] = 0;
00853         xjpos_sum[j] = 0;
00854 
00855         if (use_bias && j==n)
00856         {
00857             for (ind=0; ind<l; ind++)
00858             {
00859                 x_min = CMath::min(x_min, 1.0);
00860                 xj_max[j] = CMath::max(xj_max[j], 1.0);
00861                 C_sum[j] += C[GETI(ind)];
00862                 if(y[ind] == -1)
00863                     xjneg_sum[j] += C[GETI(ind)];
00864                 else
00865                     xjpos_sum[j] += C[GETI(ind)];
00866             }
00867         }
00868         else
00869         {
00870             iterator=x->get_feature_iterator(j);
00871             while (x->get_next_feature(ind, val, iterator))
00872             {
00873                 x_min = CMath::min(x_min, val);
00874                 xj_max[j] = CMath::max(xj_max[j], val);
00875                 C_sum[j] += C[GETI(ind)];
00876                 if(y[ind] == -1)
00877                     xjneg_sum[j] += C[GETI(ind)]*val;
00878                 else
00879                     xjpos_sum[j] += C[GETI(ind)]*val;
00880             }
00881             x->free_feature_iterator(iterator);
00882         }
00883     }
00884 
00885     CTime start_time;
00886     while (iter < max_iterations && !CSignal::cancel_computations())
00887     {
00888         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00889           break;
00890 
00891         Gmax_new = 0;
00892 
00893         for(j=0; j<active_size; j++)
00894         {
00895             int i = j+rand()%(active_size-j);
00896             CMath::swap(index[i], index[j]);
00897         }
00898 
00899         for(s=0; s<active_size; s++)
00900         {
00901             j = index[s];
00902             sum1 = 0;
00903             sum2 = 0;
00904             H = 0;
00905 
00906             if (use_bias && j==n)
00907             {
00908                 for (ind=0; ind<l; ind++)
00909                 {
00910                     double exp_wTxind = exp_wTx[ind];
00911                     double tmp1 = 1.0/(1+exp_wTxind);
00912                     double tmp2 = C[GETI(ind)]*tmp1;
00913                     double tmp3 = tmp2*exp_wTxind;
00914                     sum2 += tmp2;
00915                     sum1 += tmp3;
00916                     H += tmp1*tmp3;
00917                 }
00918             }
00919             else
00920             {
00921                 iterator=x->get_feature_iterator(j);
00922                 while (x->get_next_feature(ind, val, iterator))
00923                 {
00924                     double exp_wTxind = exp_wTx[ind];
00925                     double tmp1 = val/(1+exp_wTxind);
00926                     double tmp2 = C[GETI(ind)]*tmp1;
00927                     double tmp3 = tmp2*exp_wTxind;
00928                     sum2 += tmp2;
00929                     sum1 += tmp3;
00930                     H += tmp1*tmp3;
00931                 }
00932                 x->free_feature_iterator(iterator);
00933             }
00934 
00935             G = -sum2 + xjneg_sum[j];
00936 
00937             double Gp = G+1;
00938             double Gn = G-1;
00939             double violation = 0;
00940             if(w[j] == 0)
00941             {
00942                 if(Gp < 0)
00943                     violation = -Gp;
00944                 else if(Gn > 0)
00945                     violation = Gn;
00946                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00947                 {
00948                     active_size--;
00949                     CMath::swap(index[s], index[active_size]);
00950                     s--;
00951                     continue;
00952                 }
00953             }
00954             else if(w[j] > 0)
00955                 violation = fabs(Gp);
00956             else
00957                 violation = fabs(Gn);
00958 
00959             Gmax_new = CMath::max(Gmax_new, violation);
00960 
00961             // obtain Newton direction d
00962             if(Gp <= H*w[j])
00963                 d = -Gp/H;
00964             else if(Gn >= H*w[j])
00965                 d = -Gn/H;
00966             else
00967                 d = -w[j];
00968 
00969             if(fabs(d) < 1.0e-12)
00970                 continue;
00971 
00972             d = CMath::min(CMath::max(d,-10.0),10.0);
00973 
00974             double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00975             int num_linesearch;
00976             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00977             {
00978                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00979 
00980                 if(x_min >= 0)
00981                 {
00982                     double tmp = exp(d*xj_max[j]);
00983                     appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00984                     appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00985                     if(CMath::min(appxcond1,appxcond2) <= 0)
00986                     {
00987                         if (use_bias && j==n)
00988                         {
00989                             for (ind=0; ind<l; ind++)
00990                                 exp_wTx[ind] *= exp(d);
00991                         }
00992 
00993                         else
00994                         {
00995                             iterator=x->get_feature_iterator(j);
00996                             while (x->get_next_feature(ind, val, iterator))
00997                                 exp_wTx[ind] *= exp(d*val);
00998                             x->free_feature_iterator(iterator);
00999                         }
01000                         break;
01001                     }
01002                 }
01003 
01004                 cond += d*xjneg_sum[j];
01005 
01006                 int i = 0;
01007 
01008                 if (use_bias && j==n)
01009                 {
01010                     for (ind=0; ind<l; ind++)
01011                     {
01012                         double exp_dx = exp(d);
01013                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01014                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01015                         i++;
01016                     }
01017                 }
01018                 else
01019                 {
01020 
01021                     iterator=x->get_feature_iterator(j);
01022                     while (x->get_next_feature(ind, val, iterator))
01023                     {
01024                         double exp_dx = exp(d*val);
01025                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01026                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01027                         i++;
01028                     }
01029                     x->free_feature_iterator(iterator);
01030                 }
01031 
01032                 if(cond <= 0)
01033                 {
01034                     i = 0;
01035                     if (use_bias && j==n)
01036                     {
01037                         for (ind=0; ind<l; ind++)
01038                         {
01039                             exp_wTx[ind] = exp_wTx_new[i];
01040                             i++;
01041                         }
01042                     }
01043                     else
01044                     {
01045                         iterator=x->get_feature_iterator(j);
01046                         while (x->get_next_feature(ind, val, iterator))
01047                         {
01048                             exp_wTx[ind] = exp_wTx_new[i];
01049                             i++;
01050                         }
01051                         x->free_feature_iterator(iterator);
01052                     }
01053                     break;
01054                 }
01055                 else
01056                 {
01057                     d *= 0.5;
01058                     delta *= 0.5;
01059                 }
01060             }
01061 
01062             w[j] += d;
01063 
01064             // recompute exp_wTx[] if line search takes too many steps
01065             if(num_linesearch >= max_num_linesearch)
01066             {
01067                 SG_INFO("#");
01068                 for(int i=0; i<l; i++)
01069                     exp_wTx[i] = 0;
01070 
01071                 for(int i=0; i<w_size; i++)
01072                 {
01073                     if(w[i]==0) continue;
01074 
01075                     if (use_bias && i==n)
01076                     {
01077                         for (ind=0; ind<l; ind++)
01078                             exp_wTx[ind] += w[i];
01079                     }
01080                     else
01081                     {
01082                         iterator=x->get_feature_iterator(i);
01083                         while (x->get_next_feature(ind, val, iterator))
01084                             exp_wTx[ind] += w[i]*val;
01085                         x->free_feature_iterator(iterator);
01086                     }
01087                 }
01088 
01089                 for(int i=0; i<l; i++)
01090                     exp_wTx[i] = exp(exp_wTx[i]);
01091             }
01092         }
01093 
01094         if(iter == 0)
01095             Gmax_init = Gmax_new;
01096         iter++;
01097         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01098 
01099         if(Gmax_new <= eps*Gmax_init)
01100         {
01101             if(active_size == w_size)
01102                 break;
01103             else
01104             {
01105                 active_size = w_size;
01106                 Gmax_old = CMath::INFTY;
01107                 continue;
01108             }
01109         }
01110 
01111         Gmax_old = Gmax_new;
01112     }
01113 
01114     SG_DONE();
01115     SG_INFO("optimization finished, #iter = %d\n", iter);
01116     if(iter >= max_iterations)
01117         SG_WARNING("\nWARNING: reaching max number of iterations\n");
01118 
01119     // calculate objective value
01120 
01121     double v = 0;
01122     int nnz = 0;
01123     for(j=0; j<w_size; j++)
01124         if(w[j] != 0)
01125         {
01126             v += fabs(w[j]);
01127             nnz++;
01128         }
01129     for(j=0; j<l; j++)
01130         if(y[j] == 1)
01131             v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01132         else
01133             v += C[GETI(j)]*log(1+exp_wTx[j]);
01134 
01135     SG_INFO("Objective value = %lf\n", v);
01136     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01137 
01138     delete [] index;
01139     delete [] y;
01140     delete [] exp_wTx;
01141     delete [] exp_wTx_new;
01142     delete [] xj_max;
01143     delete [] C_sum;
01144     delete [] xjneg_sum;
01145     delete [] xjpos_sum;
01146 }
01147 
01148 SGVector<float64_t> CLibLinear::get_linear_term()
01149 {
01150     if (!m_linear_term.vlen || !m_linear_term.vector)
01151         SG_ERROR("Please assign linear term first!\n");
01152 
01153     return m_linear_term;
01154 }
01155 
01156 void CLibLinear::init_linear_term()
01157 {
01158     if (!labels)
01159         SG_ERROR("Please assign labels first!\n");
01160 
01161     m_linear_term.destroy_vector();
01162 
01163     m_linear_term=SGVector<float64_t>(labels->get_num_labels());
01164     CMath::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0);
01165 }
01166 
01167 #endif //HAVE_LAPACK
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation