SHOGUN: LibLinear.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2010 Soeren Sonnenburg
00008  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00009  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include "lib/config.h"
00012 
00013 #ifdef HAVE_LAPACK
00014 #include "lib/io.h"
00015 #include "lib/Signal.h"
00016 #include "lib/Time.h"
00017 #include "base/Parameter.h"
00018 #include "classifier/svm/LibLinear.h"
00019 #include "classifier/svm/SVM_linear.h"
00020 #include "classifier/svm/Tron.h"
00021 #include "features/DotFeatures.h"
00022 
00023 using namespace shogun;
00024 
00025 CLibLinear::CLibLinear(void)
00026 : CLinearClassifier()
00027 {
00028     init();
00029 }
00030 
00031 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00032 : CLinearClassifier()
00033 {
00034     init();
00035 }
00036 
00037 CLibLinear::CLibLinear(
00038     float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00039 : CLinearClassifier()
00040 {
00041     init();
00042     C1=C;
00043     C2=C;
00044     use_bias=true;
00045     epsilon=1e-5;
00046 
00047     set_features(traindat);
00048     set_labels(trainlab);
00049     init_linear_term();
00050 }
00051 
00052 void CLibLinear::init()
00053 {
00054     liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00055     use_bias=false;
00056     C1=1;
00057     C2=1;
00058     set_max_iterations();
00059     m_linear_term=NULL;
00060     m_linear_term_len=0;
00061 
00062     m_parameters->add(&C1, "C1",  "C Cost constant 1.");
00063     m_parameters->add(&C2, "C2",  "C Cost constant 2.");
00064     m_parameters->add(&use_bias, "use_bias",  "Indicates if bias is used.");
00065     m_parameters->add(&epsilon, "epsilon",  "Convergence precision.");
00066     m_parameters->add(&max_iterations, "max_iterations",  "Max number of iterations.");
00067     m_parameters->add_vector(&m_linear_term, &m_linear_term_len, "linear_term", "Linear Term");
00068     m_parameters->add((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type", "Type of LibLinear solver.");
00069 }
00070 
00071 CLibLinear::~CLibLinear()
00072 {
00073     delete[] m_linear_term;
00074 }
00075 
00076 bool CLibLinear::train(CFeatures* data)
00077 {
00078     CSignal::clear_cancel();
00079     ASSERT(labels);
00080 
00081     if (data)
00082     {
00083         if (!data->has_property(FP_DOT))
00084             SG_ERROR("Specified features are not of type CDotFeatures\n");
00085 
00086         set_features((CDotFeatures*) data);
00087     }
00088     ASSERT(features);
00089     ASSERT(labels->is_two_class_labeling());
00090 
00091 
00092     int32_t num_train_labels=labels->get_num_labels();
00093     int32_t num_feat=features->get_dim_feature_space();
00094     int32_t num_vec=features->get_num_vectors();
00095 
00096     if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00097             (liblinear_solver_type == L1R_LR) )
00098     {
00099         if (num_feat!=num_train_labels)
00100         {
00101             SG_ERROR("L1 methods require the data to be transposed: "
00102                     "number of features %d does not match number of "
00103                     "training labels %d\n",
00104                     num_feat, num_train_labels);
00105         }
00106         CMath::swap(num_feat, num_vec);
00107 
00108     }
00109     else
00110     {
00111         if (num_vec!=num_train_labels)
00112         {
00113             SG_ERROR("number of vectors %d does not match "
00114                     "number of training labels %d\n",
00115                     num_vec, num_train_labels);
00116         }
00117     }
00118     delete[] w;
00119     if (use_bias)
00120         w=new float64_t[num_feat+1];
00121     else
00122         w=new float64_t[num_feat+0];
00123     w_dim=num_feat;
00124 
00125     problem prob;
00126     if (use_bias)
00127     {
00128         prob.n=w_dim+1;
00129         memset(w, 0, sizeof(float64_t)*(w_dim+1));
00130     }
00131     else
00132     {
00133         prob.n=w_dim;
00134         memset(w, 0, sizeof(float64_t)*(w_dim+0));
00135     }
00136     prob.l=num_vec;
00137     prob.x=features;
00138     prob.y=new int[prob.l];
00139     prob.use_bias=use_bias;
00140 
00141     for (int32_t i=0; i<prob.l; i++)
00142         prob.y[i]=labels->get_int_label(i);
00143 
00144     int pos = 0;
00145     int neg = 0;
00146     for(int i=0;i<prob.l;i++)
00147     {
00148         if(prob.y[i]==+1)
00149             pos++;
00150     }
00151     neg = prob.l - pos;
00152 
00153     SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00154 
00155     function *fun_obj=NULL;
00156     double Cp=C1;
00157     double Cn=C2;
00158     switch (liblinear_solver_type)
00159     {
00160         case L2R_LR:
00161         {
00162             fun_obj=new l2r_lr_fun(&prob, Cp, Cn);
00163             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00164             SG_DEBUG("starting L2R_LR training via tron\n");
00165             tron_obj.tron(w, max_train_time);
00166             SG_DEBUG("done with tron\n");
00167             delete fun_obj;
00168             break;
00169         }
00170         case L2R_L2LOSS_SVC:
00171         {
00172             fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn);
00173             CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00174             tron_obj.tron(w, max_train_time);
00175             delete fun_obj;
00176             break;
00177         }
00178         case L2R_L2LOSS_SVC_DUAL:
00179             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00180             break;
00181         case L2R_L1LOSS_SVC_DUAL:
00182             solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00183             break;
00184         case L1R_L2LOSS_SVC:
00185         {
00186             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00187             solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00188             break;
00189         }
00190         case L1R_LR:
00191         {
00192             //ASSUME FEATURES ARE TRANSPOSED ALREADY
00193             solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00194             break;
00195         }
00196         case MCSVM_CS:
00197         {
00198             SG_NOTIMPLEMENTED;
00199             /* TODO...
00200             model_->w=Malloc(double, n*nr_class);
00201             for(i=0;i<nr_class;i++)
00202                 for(j=start[i];j<start[i]+count[i];j++)
00203                     sub_prob.y[j] = i;
00204             Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
00205             Solver.Solve(model_->w);
00206             */
00207         }
00208         default:
00209             SG_ERROR("Error: unknown solver_type\n");
00210             break;
00211     }
00212 
00213     if (use_bias)
00214         set_bias(w[w_dim]);
00215     else
00216         set_bias(0);
00217 
00218     delete[] prob.y;
00219 
00220     return true;
00221 }
00222 
00223 // A coordinate descent algorithm for
00224 // L1-loss and L2-loss SVM dual problems
00225 //
00226 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
00227 //    s.t.      0 <= alpha_i <= upper_bound_i,
00228 //
00229 //  where Qij = yi yj xi^T xj and
00230 //  D is a diagonal matrix
00231 //
00232 // In L1-SVM case:
00233 //      upper_bound_i = Cp if y_i = 1
00234 //      upper_bound_i = Cn if y_i = -1
00235 //      D_ii = 0
00236 // In L2-SVM case:
00237 //      upper_bound_i = INF
00238 //      D_ii = 1/(2*Cp) if y_i = 1
00239 //      D_ii = 1/(2*Cn) if y_i = -1
00240 //
00241 // Given:
00242 // x, y, Cp, Cn
00243 // eps is the stopping tolerance
00244 //
00245 // solution will be put in w
00246 
00247 #undef GETI
00248 #define GETI(i) (y[i]+1)
00249 // To support weights for instances, use GETI(i) (i)
00250 
00251 void CLibLinear::solve_l2r_l1l2_svc(
00252             const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00253 {
00254     int l = prob->l;
00255     int w_size = prob->n;
00256     int i, s, iter = 0;
00257     double C, d, G;
00258     double *QD = new double[l];
00259     int *index = new int[l];
00260     double *alpha = new double[l];
00261     int32_t *y = new int32_t[l];
00262     int active_size = l;
00263 
00264     // PG: projected gradient, for shrinking and stopping
00265     double PG;
00266     double PGmax_old = CMath::INFTY;
00267     double PGmin_old = -CMath::INFTY;
00268     double PGmax_new, PGmin_new;
00269 
00270     // default solver_type: L2R_L2LOSS_SVC_DUAL
00271     double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00272     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00273     if(st == L2R_L1LOSS_SVC_DUAL)
00274     {
00275         diag[0] = 0;
00276         diag[2] = 0;
00277         upper_bound[0] = Cn;
00278         upper_bound[2] = Cp;
00279     }
00280 
00281     int n = prob->n;
00282 
00283     if (prob->use_bias)
00284         n--;
00285 
00286     for(i=0; i<w_size; i++)
00287         w[i] = 0;
00288 
00289     for(i=0; i<l; i++)
00290     {
00291         alpha[i] = 0;
00292         if(prob->y[i] > 0)
00293         {
00294             y[i] = +1;
00295         }
00296         else
00297         {
00298             y[i] = -1;
00299         }
00300         QD[i] = diag[GETI(i)];
00301 
00302         QD[i] += prob->x->dot(i, prob->x,i);
00303         index[i] = i;
00304     }
00305 
00306 
00307     CTime start_time;
00308     while (iter < max_iterations && !CSignal::cancel_computations())
00309     {
00310         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00311           break;
00312 
00313         PGmax_new = -CMath::INFTY;
00314         PGmin_new = CMath::INFTY;
00315 
00316         for (i=0; i<active_size; i++)
00317         {
00318             int j = i+rand()%(active_size-i);
00319             CMath::swap(index[i], index[j]);
00320         }
00321 
00322         for (s=0;s<active_size;s++)
00323         {
00324             i = index[s];
00325             int32_t yi = y[i];
00326 
00327             G = prob->x->dense_dot(i, w, n);
00328             if (prob->use_bias)
00329                 G+=w[n];
00330 
00331             if (m_linear_term)
00332                 G = G*yi + m_linear_term[i];
00333             else
00334                 G = G*yi-1;
00335 
00336             C = upper_bound[GETI(i)];
00337             G += alpha[i]*diag[GETI(i)];
00338 
00339             PG = 0;
00340             if (alpha[i] == 0)
00341             {
00342                 if (G > PGmax_old)
00343                 {
00344                     active_size--;
00345                     CMath::swap(index[s], index[active_size]);
00346                     s--;
00347                     continue;
00348                 }
00349                 else if (G < 0)
00350                     PG = G;
00351             }
00352             else if (alpha[i] == C)
00353             {
00354                 if (G < PGmin_old)
00355                 {
00356                     active_size--;
00357                     CMath::swap(index[s], index[active_size]);
00358                     s--;
00359                     continue;
00360                 }
00361                 else if (G > 0)
00362                     PG = G;
00363             }
00364             else
00365                 PG = G;
00366 
00367             PGmax_new = CMath::max(PGmax_new, PG);
00368             PGmin_new = CMath::min(PGmin_new, PG);
00369 
00370             if(fabs(PG) > 1.0e-12)
00371             {
00372                 double alpha_old = alpha[i];
00373                 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00374                 d = (alpha[i] - alpha_old)*yi;
00375 
00376                 prob->x->add_to_dense_vec(d, i, w, n);
00377 
00378                 if (prob->use_bias)
00379                     w[n]+=d;
00380             }
00381         }
00382 
00383         iter++;
00384         float64_t gap=PGmax_new - PGmin_new;
00385         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00386 
00387         if(gap <= eps)
00388         {
00389             if(active_size == l)
00390                 break;
00391             else
00392             {
00393                 active_size = l;
00394                 PGmax_old = CMath::INFTY;
00395                 PGmin_old = -CMath::INFTY;
00396                 continue;
00397             }
00398         }
00399         PGmax_old = PGmax_new;
00400         PGmin_old = PGmin_new;
00401         if (PGmax_old <= 0)
00402             PGmax_old = CMath::INFTY;
00403         if (PGmin_old >= 0)
00404             PGmin_old = -CMath::INFTY;
00405     }
00406 
00407     SG_DONE();
00408     SG_INFO("optimization finished, #iter = %d\n",iter);
00409     if (iter >= max_iterations)
00410     {
00411         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00412                 "(also see liblinear FAQ)\n\n");
00413     }
00414 
00415     // calculate objective value
00416 
00417     double v = 0;
00418     int nSV = 0;
00419     for(i=0; i<w_size; i++)
00420         v += w[i]*w[i];
00421     for(i=0; i<l; i++)
00422     {
00423         v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00424         if(alpha[i] > 0)
00425             ++nSV;
00426     }
00427     SG_INFO("Objective value = %lf\n",v/2);
00428     SG_INFO("nSV = %d\n",nSV);
00429 
00430     delete [] QD;
00431     delete [] alpha;
00432     delete [] y;
00433     delete [] index;
00434 }
00435 
00436 // A coordinate descent algorithm for
00437 // L1-regularized L2-loss support vector classification
00438 //
00439 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
00440 //
00441 // Given:
00442 // x, y, Cp, Cn
00443 // eps is the stopping tolerance
00444 //
00445 // solution will be put in w
00446 
00447 #undef GETI
00448 #define GETI(i) (y[i]+1)
00449 // To support weights for instances, use GETI(i) (i)
00450 
00451 void CLibLinear::solve_l1r_l2_svc(
00452     problem *prob_col, double eps, double Cp, double Cn)
00453 {
00454     int l = prob_col->l;
00455     int w_size = prob_col->n;
00456     int j, s, iter = 0;
00457     int active_size = w_size;
00458     int max_num_linesearch = 20;
00459 
00460     double sigma = 0.01;
00461     double d, G_loss, G, H;
00462     double Gmax_old = CMath::INFTY;
00463     double Gmax_new;
00464     double Gmax_init=0;
00465     double d_old, d_diff;
00466     double loss_old=0, loss_new;
00467     double appxcond, cond;
00468 
00469     int *index = new int[w_size];
00470     int32_t *y = new int32_t[l];
00471     double *b = new double[l]; // b = 1-ywTx
00472     double *xj_sq = new double[w_size];
00473 
00474     CDotFeatures* x = (CDotFeatures*) prob_col->x;
00475     void* iterator;
00476     int32_t ind;
00477     float64_t val;
00478 
00479     double C[3] = {Cn,0,Cp};
00480 
00481     int n = prob_col->n;
00482     if (prob_col->use_bias)
00483         n--;
00484 
00485     for(j=0; j<l; j++)
00486     {
00487         b[j] = 1;
00488         if(prob_col->y[j] > 0)
00489             y[j] = 1;
00490         else
00491             y[j] = -1;
00492     }
00493 
00494     for(j=0; j<w_size; j++)
00495     {
00496         w[j] = 0;
00497         index[j] = j;
00498         xj_sq[j] = 0;
00499 
00500         if (use_bias && j==n)
00501         {
00502             for (ind=0; ind<l; ind++)
00503                 xj_sq[n] += C[GETI(ind)];
00504         }
00505         else
00506         {
00507             iterator=x->get_feature_iterator(j);
00508             while (x->get_next_feature(ind, val, iterator))
00509                 xj_sq[j] += C[GETI(ind)]*val*val;
00510             x->free_feature_iterator(iterator);
00511         }
00512     }
00513 
00514 
00515     CTime start_time;
00516     while (iter < max_iterations && !CSignal::cancel_computations())
00517     {
00518         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00519           break;
00520 
00521         Gmax_new  = 0;
00522 
00523         for(j=0; j<active_size; j++)
00524         {
00525             int i = j+rand()%(active_size-j);
00526             CMath::swap(index[i], index[j]);
00527         }
00528 
00529         for(s=0; s<active_size; s++)
00530         {
00531             j = index[s];
00532             G_loss = 0;
00533             H = 0;
00534 
00535             if (use_bias && j==n)
00536             {
00537                 for (ind=0; ind<l; ind++)
00538                 {
00539                     if(b[ind] > 0)
00540                     {
00541                         double tmp = C[GETI(ind)]*y[ind];
00542                         G_loss -= tmp*b[ind];
00543                         H += tmp*y[ind];
00544                     }
00545                 }
00546             }
00547             else
00548             {
00549                 iterator=x->get_feature_iterator(j);
00550 
00551                 while (x->get_next_feature(ind, val, iterator))
00552                 {
00553                     if(b[ind] > 0)
00554                     {
00555                         double tmp = C[GETI(ind)]*val*y[ind];
00556                         G_loss -= tmp*b[ind];
00557                         H += tmp*val*y[ind];
00558                     }
00559                 }
00560                 x->free_feature_iterator(iterator);
00561             }
00562 
00563             G_loss *= 2;
00564 
00565             G = G_loss;
00566             H *= 2;
00567             H = CMath::max(H, 1e-12);
00568 
00569             double Gp = G+1;
00570             double Gn = G-1;
00571             double violation = 0;
00572             if(w[j] == 0)
00573             {
00574                 if(Gp < 0)
00575                     violation = -Gp;
00576                 else if(Gn > 0)
00577                     violation = Gn;
00578                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00579                 {
00580                     active_size--;
00581                     CMath::swap(index[s], index[active_size]);
00582                     s--;
00583                     continue;
00584                 }
00585             }
00586             else if(w[j] > 0)
00587                 violation = fabs(Gp);
00588             else
00589                 violation = fabs(Gn);
00590 
00591             Gmax_new = CMath::max(Gmax_new, violation);
00592 
00593             // obtain Newton direction d
00594             if(Gp <= H*w[j])
00595                 d = -Gp/H;
00596             else if(Gn >= H*w[j])
00597                 d = -Gn/H;
00598             else
00599                 d = -w[j];
00600 
00601             if(fabs(d) < 1.0e-12)
00602                 continue;
00603 
00604             double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00605             d_old = 0;
00606             int num_linesearch;
00607             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00608             {
00609                 d_diff = d_old - d;
00610                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00611 
00612                 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00613                 if(appxcond <= 0)
00614                 {
00615                     if (use_bias && j==n)
00616                     {
00617                         for (ind=0; ind<l; ind++)
00618                             b[ind] += d_diff*y[ind];
00619                         break;
00620                     }
00621                     else
00622                     {
00623                         iterator=x->get_feature_iterator(j);
00624                         while (x->get_next_feature(ind, val, iterator))
00625                             b[ind] += d_diff*val*y[ind];
00626 
00627                         x->free_feature_iterator(iterator);
00628                         break;
00629                     }
00630                 }
00631 
00632                 if(num_linesearch == 0)
00633                 {
00634                     loss_old = 0;
00635                     loss_new = 0;
00636 
00637                     if (use_bias && j==n)
00638                     {
00639                         for (ind=0; ind<l; ind++)
00640                         {
00641                             if(b[ind] > 0)
00642                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00643                             double b_new = b[ind] + d_diff*y[ind];
00644                             b[ind] = b_new;
00645                             if(b_new > 0)
00646                                 loss_new += C[GETI(ind)]*b_new*b_new;
00647                         }
00648                     }
00649                     else
00650                     {
00651                         iterator=x->get_feature_iterator(j);
00652                         while (x->get_next_feature(ind, val, iterator))
00653                         {
00654                             if(b[ind] > 0)
00655                                 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00656                             double b_new = b[ind] + d_diff*val*y[ind];
00657                             b[ind] = b_new;
00658                             if(b_new > 0)
00659                                 loss_new += C[GETI(ind)]*b_new*b_new;
00660                         }
00661                         x->free_feature_iterator(iterator);
00662                     }
00663                 }
00664                 else
00665                 {
00666                     loss_new = 0;
00667                     if (use_bias && j==n)
00668                     {
00669                         for (ind=0; ind<l; ind++)
00670                         {
00671                             double b_new = b[ind] + d_diff*y[ind];
00672                             b[ind] = b_new;
00673                             if(b_new > 0)
00674                                 loss_new += C[GETI(ind)]*b_new*b_new;
00675                         }
00676                     }
00677                     else
00678                     {
00679                         iterator=x->get_feature_iterator(j);
00680                         while (x->get_next_feature(ind, val, iterator))
00681                         {
00682                             double b_new = b[ind] + d_diff*val*y[ind];
00683                             b[ind] = b_new;
00684                             if(b_new > 0)
00685                                 loss_new += C[GETI(ind)]*b_new*b_new;
00686                         }
00687                         x->free_feature_iterator(iterator);
00688                     }
00689                 }
00690 
00691                 cond = cond + loss_new - loss_old;
00692                 if(cond <= 0)
00693                     break;
00694                 else
00695                 {
00696                     d_old = d;
00697                     d *= 0.5;
00698                     delta *= 0.5;
00699                 }
00700             }
00701 
00702             w[j] += d;
00703 
00704             // recompute b[] if line search takes too many steps
00705             if(num_linesearch >= max_num_linesearch)
00706             {
00707                 SG_INFO("#");
00708                 for(int i=0; i<l; i++)
00709                     b[i] = 1;
00710 
00711                 for(int i=0; i<n; i++)
00712                 {
00713                     if(w[i]==0)
00714                         continue;
00715 
00716                     iterator=x->get_feature_iterator(i);
00717                     while (x->get_next_feature(ind, val, iterator))
00718                         b[ind] -= w[i]*val*y[ind];
00719                     x->free_feature_iterator(iterator);
00720                 }
00721 
00722                 if (use_bias && w[n])
00723                 {
00724                     for (ind=0; ind<l; ind++)
00725                         b[ind] -= w[n]*y[ind];
00726                 }
00727             }
00728         }
00729 
00730         if(iter == 0)
00731             Gmax_init = Gmax_new;
00732         iter++;
00733 
00734         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00735 
00736         if(Gmax_new <= eps*Gmax_init)
00737         {
00738             if(active_size == w_size)
00739                 break;
00740             else
00741             {
00742                 active_size = w_size;
00743                 Gmax_old = CMath::INFTY;
00744                 continue;
00745             }
00746         }
00747 
00748         Gmax_old = Gmax_new;
00749     }
00750 
00751     SG_DONE();
00752     SG_INFO("optimization finished, #iter = %d\n", iter);
00753     if(iter >= max_iterations)
00754         SG_WARNING("\nWARNING: reaching max number of iterations\n");
00755 
00756     // calculate objective value
00757 
00758     double v = 0;
00759     int nnz = 0;
00760     for(j=0; j<w_size; j++)
00761     {
00762         if(w[j] != 0)
00763         {
00764             v += fabs(w[j]);
00765             nnz++;
00766         }
00767     }
00768     for(j=0; j<l; j++)
00769         if(b[j] > 0)
00770             v += C[GETI(j)]*b[j]*b[j];
00771 
00772     SG_INFO("Objective value = %lf\n", v);
00773     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00774 
00775     delete [] index;
00776     delete [] y;
00777     delete [] b;
00778     delete [] xj_sq;
00779 }
00780 
00781 // A coordinate descent algorithm for
00782 // L1-regularized logistic regression problems
00783 //
00784 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
00785 //
00786 // Given:
00787 // x, y, Cp, Cn
00788 // eps is the stopping tolerance
00789 //
00790 // solution will be put in w
00791 
00792 #undef GETI
00793 #define GETI(i) (y[i]+1)
00794 // To support weights for instances, use GETI(i) (i)
00795 
00796 void CLibLinear::solve_l1r_lr(
00797     const problem *prob_col, double eps,
00798     double Cp, double Cn)
00799 {
00800     int l = prob_col->l;
00801     int w_size = prob_col->n;
00802     int j, s, iter = 0;
00803     int active_size = w_size;
00804     int max_num_linesearch = 20;
00805 
00806     double x_min = 0;
00807     double sigma = 0.01;
00808     double d, G, H;
00809     double Gmax_old = CMath::INFTY;
00810     double Gmax_new;
00811     double Gmax_init=0;
00812     double sum1, appxcond1;
00813     double sum2, appxcond2;
00814     double cond;
00815 
00816     int *index = new int[w_size];
00817     int32_t *y = new int32_t[l];
00818     double *exp_wTx = new double[l];
00819     double *exp_wTx_new = new double[l];
00820     double *xj_max = new double[w_size];
00821     double *C_sum = new double[w_size];
00822     double *xjneg_sum = new double[w_size];
00823     double *xjpos_sum = new double[w_size];
00824 
00825     CDotFeatures* x = prob_col->x;
00826     void* iterator;
00827     int ind;
00828     double val;
00829 
00830     double C[3] = {Cn,0,Cp};
00831 
00832     int n = prob_col->n;
00833     if (prob_col->use_bias)
00834         n--;
00835 
00836     for(j=0; j<l; j++)
00837     {
00838         exp_wTx[j] = 1;
00839         if(prob_col->y[j] > 0)
00840             y[j] = 1;
00841         else
00842             y[j] = -1;
00843     }
00844     for(j=0; j<w_size; j++)
00845     {
00846         w[j] = 0;
00847         index[j] = j;
00848         xj_max[j] = 0;
00849         C_sum[j] = 0;
00850         xjneg_sum[j] = 0;
00851         xjpos_sum[j] = 0;
00852 
00853         if (use_bias && j==n)
00854         {
00855             for (ind=0; ind<l; ind++)
00856             {
00857                 x_min = CMath::min(x_min, 1.0);
00858                 xj_max[j] = CMath::max(xj_max[j], 1.0);
00859                 C_sum[j] += C[GETI(ind)];
00860                 if(y[ind] == -1)
00861                     xjneg_sum[j] += C[GETI(ind)];
00862                 else
00863                     xjpos_sum[j] += C[GETI(ind)];
00864             }
00865         }
00866         else
00867         {
00868             iterator=x->get_feature_iterator(j);
00869             while (x->get_next_feature(ind, val, iterator))
00870             {
00871                 x_min = CMath::min(x_min, val);
00872                 xj_max[j] = CMath::max(xj_max[j], val);
00873                 C_sum[j] += C[GETI(ind)];
00874                 if(y[ind] == -1)
00875                     xjneg_sum[j] += C[GETI(ind)]*val;
00876                 else
00877                     xjpos_sum[j] += C[GETI(ind)]*val;
00878             }
00879             x->free_feature_iterator(iterator);
00880         }
00881     }
00882 
00883     CTime start_time;
00884     while (iter < max_iterations && !CSignal::cancel_computations())
00885     {
00886         if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00887           break;
00888 
00889         Gmax_new = 0;
00890 
00891         for(j=0; j<active_size; j++)
00892         {
00893             int i = j+rand()%(active_size-j);
00894             CMath::swap(index[i], index[j]);
00895         }
00896 
00897         for(s=0; s<active_size; s++)
00898         {
00899             j = index[s];
00900             sum1 = 0;
00901             sum2 = 0;
00902             H = 0;
00903 
00904             if (use_bias && j==n)
00905             {
00906                 for (ind=0; ind<l; ind++)
00907                 {
00908                     double exp_wTxind = exp_wTx[ind];
00909                     double tmp1 = 1.0/(1+exp_wTxind);
00910                     double tmp2 = C[GETI(ind)]*tmp1;
00911                     double tmp3 = tmp2*exp_wTxind;
00912                     sum2 += tmp2;
00913                     sum1 += tmp3;
00914                     H += tmp1*tmp3;
00915                 }
00916             }
00917             else
00918             {
00919                 iterator=x->get_feature_iterator(j);
00920                 while (x->get_next_feature(ind, val, iterator))
00921                 {
00922                     double exp_wTxind = exp_wTx[ind];
00923                     double tmp1 = val/(1+exp_wTxind);
00924                     double tmp2 = C[GETI(ind)]*tmp1;
00925                     double tmp3 = tmp2*exp_wTxind;
00926                     sum2 += tmp2;
00927                     sum1 += tmp3;
00928                     H += tmp1*tmp3;
00929                 }
00930                 x->free_feature_iterator(iterator);
00931             }
00932 
00933             G = -sum2 + xjneg_sum[j];
00934 
00935             double Gp = G+1;
00936             double Gn = G-1;
00937             double violation = 0;
00938             if(w[j] == 0)
00939             {
00940                 if(Gp < 0)
00941                     violation = -Gp;
00942                 else if(Gn > 0)
00943                     violation = Gn;
00944                 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00945                 {
00946                     active_size--;
00947                     CMath::swap(index[s], index[active_size]);
00948                     s--;
00949                     continue;
00950                 }
00951             }
00952             else if(w[j] > 0)
00953                 violation = fabs(Gp);
00954             else
00955                 violation = fabs(Gn);
00956 
00957             Gmax_new = CMath::max(Gmax_new, violation);
00958 
00959             // obtain Newton direction d
00960             if(Gp <= H*w[j])
00961                 d = -Gp/H;
00962             else if(Gn >= H*w[j])
00963                 d = -Gn/H;
00964             else
00965                 d = -w[j];
00966 
00967             if(fabs(d) < 1.0e-12)
00968                 continue;
00969 
00970             d = CMath::min(CMath::max(d,-10.0),10.0);
00971 
00972             double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00973             int num_linesearch;
00974             for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00975             {
00976                 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00977 
00978                 if(x_min >= 0)
00979                 {
00980                     double tmp = exp(d*xj_max[j]);
00981                     appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00982                     appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00983                     if(CMath::min(appxcond1,appxcond2) <= 0)
00984                     {
00985                         if (use_bias && j==n)
00986                         {
00987                             for (ind=0; ind<l; ind++)
00988                                 exp_wTx[ind] *= exp(d);
00989                         }
00990 
00991                         else
00992                         {
00993                             iterator=x->get_feature_iterator(j);
00994                             while (x->get_next_feature(ind, val, iterator))
00995                                 exp_wTx[ind] *= exp(d*val);
00996                             x->free_feature_iterator(iterator);
00997                         }
00998                         break;
00999                     }
01000                 }
01001 
01002                 cond += d*xjneg_sum[j];
01003 
01004                 int i = 0;
01005 
01006                 if (use_bias && j==n)
01007                 {
01008                     for (ind=0; ind<l; ind++)
01009                     {
01010                         double exp_dx = exp(d);
01011                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01012                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01013                         i++;
01014                     }
01015                 }
01016                 else
01017                 {
01018 
01019                     iterator=x->get_feature_iterator(j);
01020                     while (x->get_next_feature(ind, val, iterator))
01021                     {
01022                         double exp_dx = exp(d*val);
01023                         exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01024                         cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01025                         i++;
01026                     }
01027                     x->free_feature_iterator(iterator);
01028                 }
01029 
01030                 if(cond <= 0)
01031                 {
01032                     i = 0;
01033                     if (use_bias && j==n)
01034                     {
01035                         for (ind=0; ind<l; ind++)
01036                         {
01037                             exp_wTx[ind] = exp_wTx_new[i];
01038                             i++;
01039                         }
01040                     }
01041                     else
01042                     {
01043                         iterator=x->get_feature_iterator(j);
01044                         while (x->get_next_feature(ind, val, iterator))
01045                         {
01046                             exp_wTx[ind] = exp_wTx_new[i];
01047                             i++;
01048                         }
01049                         x->free_feature_iterator(iterator);
01050                     }
01051                     break;
01052                 }
01053                 else
01054                 {
01055                     d *= 0.5;
01056                     delta *= 0.5;
01057                 }
01058             }
01059 
01060             w[j] += d;
01061 
01062             // recompute exp_wTx[] if line search takes too many steps
01063             if(num_linesearch >= max_num_linesearch)
01064             {
01065                 SG_INFO("#");
01066                 for(int i=0; i<l; i++)
01067                     exp_wTx[i] = 0;
01068 
01069                 for(int i=0; i<w_size; i++)
01070                 {
01071                     if(w[i]==0) continue;
01072 
01073                     if (use_bias && i==n)
01074                     {
01075                         for (ind=0; ind<l; ind++)
01076                             exp_wTx[ind] += w[i];
01077                     }
01078                     else
01079                     {
01080                         iterator=x->get_feature_iterator(i);
01081                         while (x->get_next_feature(ind, val, iterator))
01082                             exp_wTx[ind] += w[i]*val;
01083                         x->free_feature_iterator(iterator);
01084                     }
01085                 }
01086 
01087                 for(int i=0; i<l; i++)
01088                     exp_wTx[i] = exp(exp_wTx[i]);
01089             }
01090         }
01091 
01092         if(iter == 0)
01093             Gmax_init = Gmax_new;
01094         iter++;
01095         SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01096 
01097         if(Gmax_new <= eps*Gmax_init)
01098         {
01099             if(active_size == w_size)
01100                 break;
01101             else
01102             {
01103                 active_size = w_size;
01104                 Gmax_old = CMath::INFTY;
01105                 continue;
01106             }
01107         }
01108 
01109         Gmax_old = Gmax_new;
01110     }
01111 
01112     SG_DONE();
01113     SG_INFO("optimization finished, #iter = %d\n", iter);
01114     if(iter >= max_iterations)
01115         SG_WARNING("\nWARNING: reaching max number of iterations\n");
01116 
01117     // calculate objective value
01118 
01119     double v = 0;
01120     int nnz = 0;
01121     for(j=0; j<w_size; j++)
01122         if(w[j] != 0)
01123         {
01124             v += fabs(w[j]);
01125             nnz++;
01126         }
01127     for(j=0; j<l; j++)
01128         if(y[j] == 1)
01129             v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01130         else
01131             v += C[GETI(j)]*log(1+exp_wTx[j]);
01132 
01133     SG_INFO("Objective value = %lf\n", v);
01134     SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01135 
01136     delete [] index;
01137     delete [] y;
01138     delete [] exp_wTx;
01139     delete [] exp_wTx_new;
01140     delete [] xj_max;
01141     delete [] C_sum;
01142     delete [] xjneg_sum;
01143     delete [] xjpos_sum;
01144 }
01145 
01146 void CLibLinear::get_linear_term(float64_t** linear_term, int32_t* len)
01147 {
01148     if (!m_linear_term_len || !m_linear_term)
01149         SG_ERROR("Please assign linear term first!\n");
01150 
01151     *linear_term=(float64_t*) malloc(sizeof(float64_t)*m_linear_term_len);
01152 
01153     for (int32_t i=0; i<m_linear_term_len; i++)
01154         (*linear_term)[i]=m_linear_term[i];
01155 }
01156 
01157 void CLibLinear::init_linear_term()
01158 {
01159     if (!labels)
01160         SG_ERROR("Please assign labels first!\n");
01161 
01162     delete[] m_linear_term;
01163 
01164     m_linear_term_len=labels->get_num_labels();
01165     m_linear_term = new float64_t[m_linear_term_len];
01166     CMath::fill_vector(m_linear_term, m_linear_term_len, -1.0);
01167 }
01168 
01169 #endif //HAVE_LAPACK