SHOGUN: LibLinearMTL.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011-2012 Christian Widmer
00008  * Written (W) 2007-2010 Soeren Sonnenburg
00009  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00010  * Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <vector>
00014 
00015 #include <shogun/lib/config.h>
00016 
00017 #ifdef HAVE_LAPACK
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/lib/Time.h>
00021 #include <shogun/base/Parameter.h>
00022 #include <shogun/transfer/multitask/LibLinearMTL.h>
00023 #include <shogun/optimization/liblinear/tron.h>
00024 #include <shogun/features/DotFeatures.h>
00025 
00026 using namespace shogun;
00027 
00028 
00029     CLibLinearMTL::CLibLinearMTL()
00030 : CLinearMachine()
00031 {
00032     init();
00033 }
00034 
00035 CLibLinearMTL::CLibLinearMTL(
00036         float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00037 : CLinearMachine()
00038 {
00039     init();
00040     C1=C;
00041     C2=C;
00042     use_bias=true;
00043 
00044     set_features(traindat);
00045     set_labels(trainlab);
00046 
00047 }
00048 
00049 
00050 void CLibLinearMTL::init()
00051 {
00052     use_bias=false;
00053     C1=1;
00054     C2=1;
00055     set_max_iterations();
00056     epsilon=1e-5;
00057 
00058     SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00059     SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00060     SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00061             MS_NOT_AVAILABLE);
00062     SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00063     SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00064             MS_NOT_AVAILABLE);
00065 
00066 }
00067 
00068 CLibLinearMTL::~CLibLinearMTL()
00069 {
00070 }
00071 
00072 bool CLibLinearMTL::train_machine(CFeatures* data)
00073 {
00074     CSignal::clear_cancel();
00075     ASSERT(m_labels);
00076 
00077     if (data)
00078     {
00079         if (!data->has_property(FP_DOT))
00080             SG_ERROR("Specified features are not of type CDotFeatures\n");
00081 
00082         set_features((CDotFeatures*) data);
00083     }
00084     ASSERT(features);
00085     m_labels->ensure_valid();
00086 
00087 
00088     int32_t num_train_labels=m_labels->get_num_labels();
00089     int32_t num_feat=features->get_dim_feature_space();
00090     int32_t num_vec=features->get_num_vectors();
00091 
00092     if (num_vec!=num_train_labels)
00093     {
00094         SG_ERROR("number of vectors %d does not match "
00095                 "number of training labels %d\n",
00096                 num_vec, num_train_labels);
00097     }
00098 
00099 
00100     float64_t* training_w = NULL;
00101     if (use_bias)
00102         training_w=SG_MALLOC(float64_t, num_feat+1);
00103     else
00104         training_w=SG_MALLOC(float64_t, num_feat+0);
00105 
00106     problem prob;
00107     if (use_bias)
00108     {
00109         prob.n=num_feat+1;
00110         memset(training_w, 0, sizeof(float64_t)*(num_feat+1));
00111     }
00112     else
00113     {
00114         prob.n=num_feat;
00115         memset(training_w, 0, sizeof(float64_t)*(num_feat+0));
00116     }
00117     prob.l=num_vec;
00118     prob.x=features;
00119     prob.y=SG_MALLOC(float64_t, prob.l);
00120     prob.use_bias=use_bias;
00121 
00122     for (int32_t i=0; i<prob.l; i++)
00123         prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);
00124 
00125     int pos = 0;
00126     int neg = 0;
00127     for(int i=0;i<prob.l;i++)
00128     {
00129         if(prob.y[i]==+1)
00130             pos++;
00131     }
00132     neg = prob.l - pos;
00133 
00134     SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00135     SG_INFO("%d positives, %d negatives\n", pos, neg);
00136 
00137     double Cp=C1;
00138     double Cn=C2;
00139     solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);
00140 
00141     if (use_bias)
00142         set_bias(training_w[num_feat]);
00143     else
00144         set_bias(0);
00145 
00146     SG_FREE(prob.y);
00147 
00148     w = SGVector<float64_t>(num_feat);
00149     for (int32_t i=0; i<num_feat; i++)
00150         w[i] = training_w[i];
00151 
00152     return true;
00153 }
00154 
00155 // A coordinate descent algorithm for
00156 // L1-loss and L2-loss SVM dual problems
00157 //
00158 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
00159 //    s.t.      0 <= alpha_i <= upper_bound_i,
00160 //
00161 //  where Qij = yi yj xi^T xj and
00162 //  D is a diagonal matrix
00163 //
00164 // In L1-SVM case:
00165 //      upper_bound_i = Cp if y_i = 1
00166 //      upper_bound_i = Cn if y_i = -1
00167 //      D_ii = 0
00168 // In L2-SVM case:
00169 //      upper_bound_i = INF
00170 //      D_ii = 1/(2*Cp) if y_i = 1
00171 //      D_ii = 1/(2*Cn) if y_i = -1
00172 //
00173 // Given:
00174 // x, y, Cp, Cn
00175 // eps is the stopping tolerance
00176 //
00177 // solution will be put in w
00178 
00179 #undef GETI
00180 #define GETI(i) (y[i]+1)
00181 // To support weights for instances, use GETI(i) (i)
00182 
00183 
00184 void CLibLinearMTL::solve_l2r_l1l2_svc(const problem *prob, double eps, double Cp, double Cn)
00185 {
00186 
00187 
00188 
00189     int l = prob->l;
00190     int w_size = prob->n;
00191     int i, s, iter = 0;
00192     double C, d, G;
00193     double *QD = SG_MALLOC(double, l);
00194     int *index = SG_MALLOC(int, l);
00195     //double *alpha = SG_MALLOC(double, l);
00196 
00197     int32_t *y = SG_MALLOC(int32_t, l);
00198     int active_size = l;
00199     // PG: projected gradient, for shrinking and stopping
00200     double PG;
00201     double PGmax_old = CMath::INFTY;
00202     double PGmin_old = -CMath::INFTY;
00203     double PGmax_new, PGmin_new;
00204 
00205     // matrix W
00206     V = SGMatrix<float64_t>(w_size,num_tasks);
00207 
00208     // save alpha
00209     alphas = SGVector<float64_t>(l);
00210 
00211 
00212     // default solver_type: L2R_L2LOSS_SVC_DUAL
00213     double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00214     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00215     if(true)
00216     {
00217         diag[0] = 0;
00218         diag[2] = 0;
00219         upper_bound[0] = Cn;
00220         upper_bound[2] = Cp;
00221     }
00222 
00223     int n = prob->n;
00224 
00225     if (prob->use_bias)
00226         n--;
00227 
00228     // set V to zero
00229     for(int32_t k=0; k<w_size*num_tasks; k++) 
00230     {
00231         V.matrix[k] = 0;
00232     }
00233 
00234     // init alphas
00235     for(i=0; i<l; i++)
00236     {
00237         alphas[i] = 0;
00238     }
00239 
00240     for(i=0; i<l; i++)
00241     {
00242         if(prob->y[i] > 0)
00243         {
00244             y[i] = +1;
00245         }
00246         else
00247         {
00248             y[i] = -1;
00249         }
00250         QD[i] = diag[GETI(i)];
00251         QD[i] += prob->x->dot(i, prob->x,i);
00252         index[i] = i;
00253     }
00254 
00255     CTime start_time;
00256     while (iter < max_iterations && !CSignal::cancel_computations())
00257     {
00258         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00259             break;
00260 
00261         PGmax_new = -CMath::INFTY;
00262         PGmin_new = CMath::INFTY;
00263 
00264         for (i=0; i<active_size; i++)
00265         {
00266             int j = i+rand()%(active_size-i);
00267             CMath::swap(index[i], index[j]);
00268         }
00269 
00270         for (s=0;s<active_size;s++)
00271         {
00272             i = index[s];
00273             int32_t yi = y[i];
00274             int32_t ti = task_indicator_lhs[i];
00275             C = upper_bound[GETI(i)];
00276 
00277             // we compute the inner sum by looping over tasks
00278             // this update is the main result of MTL_DCD
00279             typedef std::map<index_t, float64_t>::const_iterator map_iter;
00280 
00281             float64_t inner_sum = 0;
00282             for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)
00283             {
00284                 
00285                 // get data from sparse matrix
00286                 int32_t e_i = it->first;
00287                 float64_t sim = it->second;
00288 
00289                 // fetch vector
00290                 float64_t* tmp_w = V.get_column_vector(e_i);
00291                 inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);
00292 
00293                 //possibly deal with bias
00294                 //if (prob->use_bias)
00295                 //  G+=w[n];
00296             }
00297 
00298             // compute gradient
00299             G = inner_sum-1.0;
00300 
00301             // check if point can be removed from active set
00302             PG = 0;
00303             if (alphas[i] == 0)
00304             {
00305                 if (G > PGmax_old)
00306                 {
00307                     active_size--;
00308                     CMath::swap(index[s], index[active_size]);
00309                     s--;
00310                     continue;
00311                 }
00312                 else if (G < 0)
00313                     PG = G;
00314             }
00315             else if (alphas[i] == C)
00316             {
00317                 if (G < PGmin_old)
00318                 {
00319                     active_size--;
00320                     CMath::swap(index[s], index[active_size]);
00321                     s--;
00322                     continue;
00323                 }
00324                 else if (G > 0)
00325                     PG = G;
00326             }
00327             else
00328                 PG = G;
00329 
00330             PGmax_new = CMath::max(PGmax_new, PG);
00331             PGmin_new = CMath::min(PGmin_new, PG);
00332 
00333             if(fabs(PG) > 1.0e-12)
00334             {   
00335                 // save previous alpha
00336                 double alpha_old = alphas[i];
00337 
00338                 // project onto feasible set
00339                 alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);
00340                 d = (alphas[i] - alpha_old)*yi;
00341 
00342                 // update corresponding weight vector
00343                 float64_t* tmp_w = V.get_column_vector(ti);
00344                 prob->x->add_to_dense_vec(d, i, tmp_w, n);
00345 
00346 
00347                 //if (prob->use_bias)
00348                 //  w[n]+=d;
00349             }
00350         }
00351 
00352         iter++;
00353         float64_t gap=PGmax_new - PGmin_new;
00354         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00355 
00356         if(gap <= eps)
00357         {
00358             if(active_size == l)
00359                 break;
00360             else
00361             {
00362                 active_size = l;
00363                 PGmax_old = CMath::INFTY;
00364                 PGmin_old = -CMath::INFTY;
00365                 continue;
00366             }
00367         }
00368         PGmax_old = PGmax_new;
00369         PGmin_old = PGmin_new;
00370         if (PGmax_old <= 0)
00371             PGmax_old = CMath::INFTY;
00372         if (PGmin_old >= 0)
00373             PGmin_old = -CMath::INFTY;
00374     }
00375 
00376     SG_DONE();
00377     SG_INFO("optimization finished, #iter = %d\n",iter);
00378     if (iter >= max_iterations)
00379     {
00380         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00381                 "(also see liblinear FAQ)\n\n");
00382     }
00383 
00384 
00385 
00386     delete [] QD;
00387     //delete [] alpha;
00388     delete [] y;
00389     delete [] index;
00390 }
00391 
00392 
00393 float64_t CLibLinearMTL::compute_primal_obj()
00394 {
00395     /* python protype
00396        num_param = param.shape[0]
00397        num_dim = len(all_xt[0])
00398        num_tasks = int(num_param / num_dim)
00399        num_examples = len(all_xt)
00400 
00401 # vector to matrix
00402 W = param.reshape(num_tasks, num_dim)
00403 
00404 obj = 0
00405 
00406 reg_obj = 0
00407 loss_obj = 0
00408 
00409 assert len(all_xt) == len(all_xt) == len(task_indicator)
00410 
00411 # L2 regularizer
00412 for t in xrange(num_tasks):
00413 reg_obj += 0.5 * np.dot(W[t,:], W[t,:])
00414 
00415 # MTL regularizer
00416 for s in xrange(num_tasks):
00417 for t in xrange(num_tasks):
00418 reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:])
00419 
00420 # loss
00421 for i in xrange(num_examples):
00422 ti = task_indicator[i]
00423 t = all_lt[i] * np.dot(W[ti,:], all_xt[i])
00424 # hinge
00425 loss_obj += max(0, 1 - t)
00426 
00427 
00428 # combine to final objective
00429 obj = reg_obj + C * loss_obj
00430 
00431 
00432 return obj
00433 */
00434 
00435     SG_INFO("DONE to compute Primal OBJ\n");
00436     // calculate objective value
00437     SGMatrix<float64_t> W = get_W();
00438 
00439     float64_t obj = 0;
00440     int32_t num_vec = features->get_num_vectors();
00441     int32_t w_size = features->get_dim_feature_space();
00442 
00443     // L2 regularizer
00444     for (int32_t t=0; t<num_tasks; t++)
00445     {
00446         float64_t* w_t = W.get_column_vector(t);
00447 
00448         for(int32_t i=0; i<w_size; i++)
00449         {
00450             obj += 0.5 * w_t[i]*w_t[i];
00451         }
00452     }
00453 
00454     // MTL regularizer
00455     for (int32_t s=0; s<num_tasks; s++)
00456     {
00457         float64_t* w_s = W.get_column_vector(s);
00458         for (int32_t t=0; t<num_tasks; t++)
00459         {
00460             float64_t* w_t = W.get_column_vector(t);
00461             float64_t l = graph_laplacian.matrix[s*num_tasks+t];
00462 
00463             for(int32_t i=0; i<w_size; i++)
00464             {
00465                 obj += 0.5 * l * w_s[i]*w_t[i];
00466             }
00467         }
00468     }
00469 
00470     // loss
00471     for(int32_t i=0; i<num_vec; i++)
00472     {
00473         int32_t ti = task_indicator_lhs[i];
00474         float64_t* w_t = W.get_column_vector(ti);
00475         float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);
00476 
00477         // hinge loss
00478         obj += C1 * CMath::max(0.0, 1 - residual);
00479 
00480     }
00481 
00482     SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj);
00483 
00484     return obj;
00485 }
00486 
00487 float64_t CLibLinearMTL::compute_dual_obj()
00488 {
00489     /* python prototype
00490        num_xt = len(xt)
00491 
00492 # compute quadratic term
00493 for i in xrange(num_xt):
00494 for j in xrange(num_xt):
00495 
00496 s = task_indicator[i]
00497 t = task_indicator[j]
00498 
00499 obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j])
00500 
00501 return obj
00502 */
00503 
00504     SG_INFO("starting to compute DUAL OBJ\n");
00505 
00506     int32_t num_vec=features->get_num_vectors();
00507 
00508     float64_t obj = 0;
00509 
00510     // compute linear term
00511     for(int32_t i=0; i<num_vec; i++)
00512     {
00513         obj += alphas[i];
00514     }
00515 
00516     // compute quadratic term
00517 
00518     int32_t v_size = features->get_dim_feature_space();
00519 
00520     // efficient computation
00521     for (int32_t s=0; s<num_tasks; s++)
00522     {
00523         float64_t* v_s = V.get_column_vector(s);
00524         for (int32_t t=0; t<num_tasks; t++)
00525         {
00526             float64_t* v_t = V.get_column_vector(t);
00527             const float64_t ts = task_similarity_matrix(s, t);
00528 
00529             for(int32_t i=0; i<v_size; i++)
00530             {
00531                 obj -= 0.5 * ts * v_s[i]*v_t[i];
00532             }
00533         }
00534     }
00535 
00536     /*
00537     // naiive implementation
00538     float64_t tmp_val2 = 0;
00539 
00540     for(int32_t i=0; i<num_vec; i++)
00541     {
00542         int32_t ti_i = task_indicator_lhs[i];
00543         for(int32_t j=0; j<num_vec; j++)
00544         {
00545             // look up task similarity
00546             int32_t ti_j = task_indicator_lhs[j];
00547 
00548             const float64_t ts = task_similarity_matrix(ti_i, ti_j);
00549 
00550             // compute objective
00551             tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) * 
00552                 ((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j);
00553         }
00554     }
00555     */
00556 
00557 
00558     return obj;
00559 }
00560 
00561 
00562 float64_t CLibLinearMTL::compute_duality_gap()
00563 {
00564     return 0.0;
00565 }
00566 
00567 
00568 #endif //HAVE_LAPACK