cn/current/LibLinearMTL_8cpp_source.html

 /*

  * This program is free software; you can redistribute it and/or modify

  * it under the terms of the GNU General Public License as published by

  * the Free Software Foundation; either version 3 of the License, or

  * (at your option) any later version.

  *

  * Written (W) 2011-2012 Christian Widmer

  * Written (W) 2007-2010 Soeren Sonnenburg

  * Copyright (c) 2007-2009 The LIBLINEAR Project.

  * Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society

  */


 #include <vector>


 #include <shogun/lib/config.h>


 #ifdef HAVE_LAPACK

 #include <shogun/io/SGIO.h>

 #include <shogun/lib/Signal.h>

 #include <shogun/lib/Time.h>

 #include <shogun/base/Parameter.h>

 #include <shogun/transfer/multitask/LibLinearMTL.h>

 #include <shogun/optimization/liblinear/tron.h>

 #include <shogun/features/DotFeatures.h>


 using namespace shogun;


     CLibLinearMTL::CLibLinearMTL()

 : CLinearMachine()

 {

     init();

 }


 CLibLinearMTL::CLibLinearMTL(

         float64_t C, CDotFeatures* traindat, CLabels* trainlab)

 : CLinearMachine()

 {

     init();

     C1=C;

     C2=C;

     use_bias=true;


     set_features(traindat);

     set_labels(trainlab);


 }


 void CLibLinearMTL::init()

 {

     use_bias=false;

     C1=1;

     C2=1;

     set_max_iterations();

     epsilon=1e-5;


     SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);

     SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);

     SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",

             MS_NOT_AVAILABLE);

     SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);

     SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",

             MS_NOT_AVAILABLE);


 }


 CLibLinearMTL::~CLibLinearMTL()

 {

 }


 bool CLibLinearMTL::train_machine(CFeatures* data)

 {

     CSignal::clear_cancel();

     ASSERT(m_labels)


     if (data)

     {

         if (!data->has_property(FP_DOT))

             SG_ERROR("Specified features are not of type CDotFeatures\n")


         set_features((CDotFeatures*) data);

     }

     ASSERT(features)

     m_labels->ensure_valid();


     int32_t num_train_labels=m_labels->get_num_labels();

     int32_t num_feat=features->get_dim_feature_space();

     int32_t num_vec=features->get_num_vectors();


     if (num_vec!=num_train_labels)

     {

         SG_ERROR("number of vectors %d does not match "

                 "number of training labels %d\n",

                 num_vec, num_train_labels);

     }


     float64_t* training_w = NULL;

     if (use_bias)

         training_w=SG_MALLOC(float64_t, num_feat+1);

     else

         training_w=SG_MALLOC(float64_t, num_feat+0);


     liblinear_problem prob;

     if (use_bias)

     {

         prob.n=num_feat+1;

         memset(training_w, 0, sizeof(float64_t)*(num_feat+1));

     }

     else

     {

         prob.n=num_feat;

         memset(training_w, 0, sizeof(float64_t)*(num_feat+0));

     }

     prob.l=num_vec;

     prob.x=features;

     prob.y=SG_MALLOC(float64_t, prob.l);

     prob.use_bias=use_bias;


     for (int32_t i=0; i<prob.l; i++)

         prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);


     int pos = 0;

     int neg = 0;

     for(int i=0;i<prob.l;i++)

     {

         if(prob.y[i]==+1)

             pos++;

     }

     neg = prob.l - pos;


     SG_INFO("%d training points %d dims\n", prob.l, prob.n)

     SG_INFO("%d positives, %d negatives\n", pos, neg)


     double Cp=C1;

     double Cn=C2;

     solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);


     if (use_bias)

         set_bias(training_w[num_feat]);

     else

         set_bias(0);


     SG_FREE(prob.y);


     w = SGVector<float64_t>(num_feat);

     for (int32_t i=0; i<num_feat; i++)

         w[i] = training_w[i];


     return true;

 }


 // A coordinate descent algorithm for

 // L1-loss and L2-loss SVM dual problems

 //

 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,

 //    s.t.      0 <= alpha_i <= upper_bound_i,

 //

 //  where Qij = yi yj xi^T xj and

 //  D is a diagonal matrix

 //

 // In L1-SVM case:

 //      upper_bound_i = Cp if y_i = 1

 //      upper_bound_i = Cn if y_i = -1

 //      D_ii = 0

 // In L2-SVM case:

 //      upper_bound_i = INF

 //      D_ii = 1/(2*Cp) if y_i = 1

 //      D_ii = 1/(2*Cn) if y_i = -1

 //

 // Given:

 // x, y, Cp, Cn

 // eps is the stopping tolerance

 //

 // solution will be put in w


 #undef GETI

 #define GETI(i) (y[i]+1)

 // To support weights for instances, use GETI(i) (i)


 void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, double Cp, double Cn)

 {


     int l = prob->l;

     int w_size = prob->n;

     int i, s, iter = 0;

     double C, d, G;

     double *QD = SG_MALLOC(double, l);

     int *index = SG_MALLOC(int, l);

     //double *alpha = SG_MALLOC(double, l);


     int32_t *y = SG_MALLOC(int32_t, l);

     int active_size = l;

     // PG: projected gradient, for shrinking and stopping

     double PG;

     double PGmax_old = CMath::INFTY;

     double PGmin_old = -CMath::INFTY;

     double PGmax_new, PGmin_new;


     // matrix W

     V = SGMatrix<float64_t>(w_size,num_tasks);


     // save alpha

     alphas = SGVector<float64_t>(l);


     // default solver_type: L2R_L2LOSS_SVC_DUAL

     double diag[3] = {0.5/Cn, 0, 0.5/Cp};

     double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};

     if(true)

     {

         diag[0] = 0;

         diag[2] = 0;

         upper_bound[0] = Cn;

         upper_bound[2] = Cp;

     }


     int n = prob->n;


     if (prob->use_bias)

         n--;


     // set V to zero

     for(int32_t k=0; k<w_size*num_tasks; k++)

     {

         V.matrix[k] = 0;

     }


     // init alphas

     for(i=0; i<l; i++)

     {

         alphas[i] = 0;

     }


     for(i=0; i<l; i++)

     {

         if(prob->y[i] > 0)

         {

             y[i] = +1;

         }

         else

         {

             y[i] = -1;

         }

         QD[i] = diag[GETI(i)];

         QD[i] += prob->x->dot(i, prob->x,i);

         index[i] = i;

     }


     CTime start_time;

     while (iter < max_iterations && !CSignal::cancel_computations())

     {

         if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)

             break;


         PGmax_new = -CMath::INFTY;

         PGmin_new = CMath::INFTY;


         for (i=0; i<active_size; i++)

         {

             int j = CMath::random(i, active_size-1);

             CMath::swap(index[i], index[j]);

         }


         for (s=0;s<active_size;s++)

         {

             i = index[s];

             int32_t yi = y[i];

             int32_t ti = task_indicator_lhs[i];

             C = upper_bound[GETI(i)];


             // we compute the inner sum by looping over tasks

             // this update is the main result of MTL_DCD

             typedef std::map<index_t, float64_t>::const_iterator map_iter;


             float64_t inner_sum = 0;

             for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)

             {


                 // get data from sparse matrix

                 int32_t e_i = it->first;

                 float64_t sim = it->second;


                 // fetch vector

                 float64_t* tmp_w = V.get_column_vector(e_i);

                 inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);


                 //possibly deal with bias

                 //if (prob->use_bias)

                 //  G+=w[n];

             }


             // compute gradient

             G = inner_sum-1.0;


             // check if point can be removed from active set

             PG = 0;

             if (alphas[i] == 0)

             {

                 if (G > PGmax_old)

                 {

                     active_size--;

                     CMath::swap(index[s], index[active_size]);

                     s--;

                     continue;

                 }

                 else if (G < 0)

                     PG = G;

             }

             else if (alphas[i] == C)

             {

                 if (G < PGmin_old)

                 {

                     active_size--;

                     CMath::swap(index[s], index[active_size]);

                     s--;

                     continue;

                 }

                 else if (G > 0)

                     PG = G;

             }

             else

                 PG = G;


             PGmax_new = CMath::max(PGmax_new, PG);

             PGmin_new = CMath::min(PGmin_new, PG);


             if(fabs(PG) > 1.0e-12)

             {

                 // save previous alpha

                 double alpha_old = alphas[i];


                 // project onto feasible set

                 alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);

                 d = (alphas[i] - alpha_old)*yi;


                 // update corresponding weight vector

                 float64_t* tmp_w = V.get_column_vector(ti);

                 prob->x->add_to_dense_vec(d, i, tmp_w, n);


                 //if (prob->use_bias)

                 //  w[n]+=d;

             }

         }


         iter++;

         float64_t gap=PGmax_new - PGmin_new;

         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)


         if(gap <= eps)

         {

             if(active_size == l)

                 break;

             else

             {

                 active_size = l;

                 PGmax_old = CMath::INFTY;

                 PGmin_old = -CMath::INFTY;

                 continue;

             }

         }

         PGmax_old = PGmax_new;

         PGmin_old = PGmin_new;

         if (PGmax_old <= 0)

             PGmax_old = CMath::INFTY;

         if (PGmin_old >= 0)

             PGmin_old = -CMath::INFTY;

     }


     SG_DONE()

     SG_INFO("optimization finished, #iter = %d\n",iter)

     if (iter >= max_iterations)

     {

         SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"

                 "(also see liblinear FAQ)\n\n");

     }


     delete [] QD;

     //delete [] alpha;

     delete [] y;

     delete [] index;

 }


 float64_t CLibLinearMTL::compute_primal_obj()

 {

     /* python protype

        num_param = param.shape[0]

        num_dim = len(all_xt[0])

        num_tasks = int(num_param / num_dim)

        num_examples = len(all_xt)


 # vector to matrix

 W = param.reshape(num_tasks, num_dim)


 obj = 0


 reg_obj = 0

 loss_obj = 0


 assert len(all_xt) == len(all_xt) == len(task_indicator)


 # L2 regularizer

 for t in xrange(num_tasks):

 reg_obj += 0.5 * np.dot(W[t,:], W[t,:])


 # MTL regularizer

 for s in xrange(num_tasks):

 for t in xrange(num_tasks):

 reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:])


 # loss

 for i in xrange(num_examples):

 ti = task_indicator[i]

 t = all_lt[i] * np.dot(W[ti,:], all_xt[i])

 # hinge

 loss_obj += max(0, 1 - t)


 # combine to final objective

 obj = reg_obj + C * loss_obj


 return obj

 */


     SG_INFO("DONE to compute Primal OBJ\n")

     // calculate objective value

     SGMatrix<float64_t> W = get_W();


     float64_t obj = 0;

     int32_t num_vec = features->get_num_vectors();

     int32_t w_size = features->get_dim_feature_space();


     // L2 regularizer

     for (int32_t t=0; t<num_tasks; t++)

     {

         float64_t* w_t = W.get_column_vector(t);


         for(int32_t i=0; i<w_size; i++)

         {

             obj += 0.5 * w_t[i]*w_t[i];

         }

     }


     // MTL regularizer

     for (int32_t s=0; s<num_tasks; s++)

     {

         float64_t* w_s = W.get_column_vector(s);

         for (int32_t t=0; t<num_tasks; t++)

         {

             float64_t* w_t = W.get_column_vector(t);

             float64_t l = graph_laplacian.matrix[s*num_tasks+t];


             for(int32_t i=0; i<w_size; i++)

             {

                 obj += 0.5 * l * w_s[i]*w_t[i];

             }

         }

     }


     // loss

     for(int32_t i=0; i<num_vec; i++)

     {

         int32_t ti = task_indicator_lhs[i];

         float64_t* w_t = W.get_column_vector(ti);

         float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);


         // hinge loss

         obj += C1 * CMath::max(0.0, 1 - residual);


     }


     SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj)


     return obj;

 }


 float64_t CLibLinearMTL::compute_dual_obj()

 {

     /* python prototype

        num_xt = len(xt)


 # compute quadratic term

 for i in xrange(num_xt):

 for j in xrange(num_xt):


 s = task_indicator[i]

 t = task_indicator[j]


 obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j])


 return obj

 */


     SG_INFO("starting to compute DUAL OBJ\n")


     int32_t num_vec=features->get_num_vectors();


     float64_t obj = 0;


     // compute linear term

     for(int32_t i=0; i<num_vec; i++)

     {

         obj += alphas[i];

     }


     // compute quadratic term


     int32_t v_size = features->get_dim_feature_space();


     // efficient computation

     for (int32_t s=0; s<num_tasks; s++)

     {

         float64_t* v_s = V.get_column_vector(s);

         for (int32_t t=0; t<num_tasks; t++)

         {

             float64_t* v_t = V.get_column_vector(t);

             const float64_t ts = task_similarity_matrix(s, t);


             for(int32_t i=0; i<v_size; i++)

             {

                 obj -= 0.5 * ts * v_s[i]*v_t[i];

             }

         }

     }


     /*

     // naiive implementation

     float64_t tmp_val2 = 0;


     for(int32_t i=0; i<num_vec; i++)

     {

         int32_t ti_i = task_indicator_lhs[i];

         for(int32_t j=0; j<num_vec; j++)

         {

             // look up task similarity

             int32_t ti_j = task_indicator_lhs[j];


             const float64_t ts = task_similarity_matrix(ti_i, ti_j);


             // compute objective

             tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) *

                 ((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j);

         }

     }

     */


     return obj;

 }


 float64_t CLibLinearMTL::compute_duality_gap()

 {

     return 0.0;

 }


 #endif //HAVE_LAPACK

shogun::CTime
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47

SG_INFO
#define SG_INFO(...)
Definition: SGIO.h:118

SG_DONE
#define SG_DONE()
Definition: SGIO.h:157

tron.h

shogun::CLibLinearMTL::C1
float64_t C1
Definition: LibLinearMTL.h:313

shogun::SGMatrix::matrix
T * matrix
Definition: SGMatrix.h:374

shogun::CLabels
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43

shogun::CMath::INFTY
static const float64_t INFTY
infinity
Definition: Math.h:2048

shogun::CDotFeatures::dense_dot
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0

Parameter.h

shogun::CLabels::get_num_labels
virtual int32_t get_num_labels() const =0

SGIO.h

shogun::CLibLinearMTL::compute_dual_obj
virtual float64_t compute_dual_obj()
Definition: LibLinearMTL.cpp:487

shogun::CMath::log10
static float64_t log10(float64_t v)
Definition: Math.h:897

shogun::CLibLinearMTL::compute_primal_obj
virtual float64_t compute_primal_obj()
Definition: LibLinearMTL.cpp:393

Time.h

DotFeatures.h

shogun::CFeatures::get_num_vectors
virtual int32_t get_num_vectors() const =0

config.h

shogun::CMachine::m_max_train_time
float64_t m_max_train_time
Definition: Machine.h:358

shogun::CMachine::m_labels
CLabels * m_labels
Definition: Machine.h:361

SG_ERROR
#define SG_ERROR(...)
Definition: SGIO.h:129

shogun::CLibLinearMTL::max_iterations
int32_t max_iterations
Definition: LibLinearMTL.h:321

shogun::CLibLinearMTL::V
SGMatrix< float64_t > V
Definition: LibLinearMTL.h:347

shogun::CDotFeatures
Features that support dot products among other operations.
Definition: DotFeatures.h:44

shogun::CLibLinearMTL::use_bias
bool use_bias
Definition: LibLinearMTL.h:317

shogun::SGMatrix< float64_t >

shogun::CMath::random
static uint64_t random()
Definition: Math.h:1019

shogun::MS_NOT_AVAILABLE
Definition: SGObject.h:89

shogun::CDotFeatures::get_dim_feature_space
virtual int32_t get_dim_feature_space() const =0

shogun::CLibLinearMTL::CLibLinearMTL
CLibLinearMTL()
Definition: LibLinearMTL.cpp:29

shogun::CTime::cur_time_diff
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68

LibLinearMTL.h

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::MappedSparseMatrix::data
std::vector< std::map< index_t, float64_t > > data
Definition: LibLinearMTL.h:85

shogun::SGVector< float64_t >

shogun::CSignal::clear_cancel
static void clear_cancel()
Definition: Signal.cpp:129

Signal.h

float64_t
double float64_t
Definition: common.h:50

shogun::CLinearMachine::w
SGVector< float64_t > w
Definition: LinearMachine.h:158

shogun::CLinearMachine::set_features
virtual void set_features(CDotFeatures *feat)
Definition: LinearMachine.cpp:106

shogun::CLibLinearMTL::train_machine
virtual bool train_machine(CFeatures *data=NULL)
Definition: LibLinearMTL.cpp:72

shogun::SGMatrix::get_column_vector
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:115

shogun::CLibLinearMTL::~CLibLinearMTL
virtual ~CLibLinearMTL()
Definition: LibLinearMTL.cpp:68

shogun::CMath::max
static T max(T a, T b)
Definition: Math.h:168

shogun::CLinearMachine
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63

shogun::CLibLinearMTL::num_tasks
int32_t num_tasks
Definition: LibLinearMTL.h:330

shogun::CSignal::cancel_computations
static bool cancel_computations()
Definition: Signal.h:86

shogun::CLibLinearMTL::epsilon
float64_t epsilon
Definition: LibLinearMTL.h:319

shogun::MS_AVAILABLE
Definition: SGObject.h:90

shogun::CLibLinearMTL::compute_duality_gap
virtual float64_t compute_duality_gap()
Definition: LibLinearMTL.cpp:562

shogun::CLinearMachine::features
CDotFeatures * features
Definition: LinearMachine.h:162

shogun::CLibLinearMTL::C2
float64_t C2
Definition: LibLinearMTL.h:315

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

GETI
#define GETI(i)
Definition: LibLinearMTL.cpp:180

shogun::CLibLinearMTL::get_W
SGMatrix< float64_t > get_W()
Definition: LibLinearMTL.h:237

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

shogun::CMath::min
static T min(T a, T b)
Definition: Math.h:157

shogun::CLibLinearMTL::task_indicator_lhs
SGVector< int32_t > task_indicator_lhs
Definition: LibLinearMTL.h:333

shogun::FP_DOT
Definition: FeatureTypes.h:68

shogun::CBinaryLabels
Binary Labels for binary classification.
Definition: BinaryLabels.h:37

shogun::CLibLinearMTL::task_similarity_matrix
MappedSparseMatrix task_similarity_matrix
Definition: LibLinearMTL.h:341

shogun::CMath::swap
static void swap(T &a, T &b)
Definition: Math.h:438

shogun::CLinearMachine::set_bias
virtual void set_bias(float64_t b)
Definition: LinearMachine.cpp:96

SG_WARNING
#define SG_WARNING(...)
Definition: SGIO.h:128

SG_ADD
#define SG_ADD(...)
Definition: SGObject.h:81

shogun::CLibLinearMTL::graph_laplacian
SGMatrix< float64_t > graph_laplacian
Definition: LibLinearMTL.h:344

shogun::CLibLinearMTL::set_max_iterations
void set_max_iterations(int32_t max_iter=1000)
Definition: LibLinearMTL.h:171

shogun::CFeatures::has_property
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295

shogun::CMachine::set_labels
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65

SG_SABS_PROGRESS
#define SG_SABS_PROGRESS(...)
Definition: SGIO.h:188

shogun::CLabels::ensure_valid
virtual void ensure_valid(const char *context=NULL)=0

shogun::CLibLinearMTL::alphas
SGVector< float64_t > alphas
Definition: LibLinearMTL.h:327