SHOGUN: slep_mc_plain_lr.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Sergey Lisitsyn
00008  * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
00009  */
00010 
00011 #include <shogun/lib/slep/slep_mc_plain_lr.h>
00012 #ifdef HAVE_EIGEN3
00013 #include <shogun/lib/slep/q1/eppMatrix.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/mathematics/eigen3.h>
00016 #include <shogun/lib/Signal.h>
00017 #include <shogun/lib/Time.h>
00018 #include <iostream>
00019 
00020 using namespace shogun;
00021 using namespace Eigen;
00022 using namespace std;
00023 
00024 namespace shogun
00025 {
00026 
00027 slep_result_t slep_mc_plain_lr(
00028         CDotFeatures* features,
00029         CMulticlassLabels* labels,
00030         float64_t z,
00031         const slep_options& options)
00032 {
00033     int i,j;
00034     // obtain problem parameters
00035     int n_feats   = features->get_dim_feature_space();
00036     int n_vecs    = features->get_num_vectors();
00037     int n_classes = labels->get_num_classes();
00038 
00039     // labels vector containing values in range (0 .. n_classes)
00040     SGVector<float64_t> labels_vector = labels->get_labels(); 
00041 
00042     // initialize matrices and vectors to be used
00043     // weight vector
00044     MatrixXd w  = MatrixXd::Zero(n_feats, n_classes);
00045     // intercepts (biases)
00046     VectorXd c  = VectorXd::Zero(n_classes);
00047 
00048     if (options.last_result)
00049     {
00050         SGMatrix<float64_t> last_w = options.last_result->w;
00051         SGVector<float64_t> last_c = options.last_result->c;
00052         for (i=0; i<n_classes; i++)
00053         {
00054             c[i] = last_c[i];
00055             for (j=0; j<n_feats; j++)
00056                 w(j,i) = last_w(j,i);
00057         }
00058     }
00059     // iterative process matrices and vectors
00060     MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
00061     VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
00062     // search point weight vector
00063     MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
00064     // search point intercepts
00065     VectorXd search_c = VectorXd::Zero(n_classes);
00066     // dot products
00067     MatrixXd Aw  = MatrixXd::Zero(n_vecs, n_classes);
00068     for (j=0; j<n_classes; j++)
00069         features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
00070     MatrixXd As  = MatrixXd::Zero(n_vecs, n_classes);
00071     MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
00072     // gradients
00073     MatrixXd g   = MatrixXd::Zero(n_feats, n_classes);
00074     VectorXd gc  = VectorXd::Zero(n_classes);
00075     // projection
00076     MatrixXd v   = MatrixXd::Zero(n_feats, n_classes);
00077 
00078     // Lipschitz continuous gradient parameter for line search
00079     double L = 1.0/(n_vecs*n_classes);
00080     // coefficients for search point computation
00081     double alphap = 0, alpha = 1;
00082 
00083     // lambda regularization parameter
00084     double lambda = z;
00085     // objective values
00086     double objective = 0.0;
00087     double objective_p = 0.0;
00088 
00089     int iter = 0;
00090     bool done = false;
00091     CTime time;
00092     internal::set_is_malloc_allowed(false);
00093     while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
00094     {
00095         double beta = (alphap-1)/alpha;
00096         // compute search points
00097         search_w = w + beta*wwp;
00098         search_c = c + beta*ccp;
00099 
00100         // update dot products with search point
00101         As = Aw + beta*(Aw-Awp);
00102         
00103         // compute objective and gradient at search point
00104         double fun_s = 0;
00105         g.setZero();
00106         gc.setZero();
00107         // for each vector
00108         for (i=0; i<n_vecs; i++)
00109         {
00110             // class of current vector
00111             int vec_class = labels_vector[i];
00112             // for each class
00113             for (j=0; j<n_classes; j++)
00114             {
00115                 // compute logistic loss
00116                 double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
00117                 double bb = aa > 0.0 ? aa : 0.0;
00118                 // avoid underflow via log-sum-exp trick
00119                 fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
00120                 double prob = 1.0/(1+CMath::exp(aa));
00121                 double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);
00122                 // update gradient of intercepts
00123                 gc[j] += b;
00124                 // update gradient of weight vectors
00125                 features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
00126             }
00127         }
00128         //fun_s /= (n_vecs*n_classes);
00129         
00130         wp = w;
00131         Awp = Aw;
00132         cp = c;
00133 
00134         int inner_iter = 0;
00135         double fun_x = 0;
00136 
00137         // line search process
00138         while (inner_iter<5000)
00139         {
00140             // compute line search point
00141             v = search_w - g/L;
00142             c = search_c - gc/L;
00143 
00144             // compute projection of gradient
00145             eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q);
00146 
00147             v = w - search_w;
00148 
00149             // update dot products
00150             for (j=0; j<n_classes; j++)
00151                 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
00152 
00153             // compute objective at search point
00154             fun_x = 0;
00155             for (i=0; i<n_vecs; i++)
00156             {
00157                 int vec_class = labels_vector[i];
00158                 for (j=0; j<n_classes; j++)
00159                 {
00160                     double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
00161                     double bb = aa > 0.0 ? aa : 0.0;
00162                     fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
00163                 }
00164             }
00165             //fun_x /= (n_vecs*n_classes);
00166 
00167             // check for termination of line search
00168             double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
00169             double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);
00170 
00171             // stop if projected gradient is less than 1e-20
00172             if (r_sum <= 1e-20)
00173             {
00174                 SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum);
00175                 done = true;
00176                 break;
00177             }
00178 
00179             if (l_sum <= r_sum*L)
00180                 break;
00181             else
00182                 L = CMath::max(2*L, l_sum/r_sum);
00183 
00184             inner_iter++;
00185         }
00186 
00187         // update alpha coefficients
00188         alphap = alpha;
00189         alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;
00190 
00191         // update wwp and ccp
00192         wwp = w - wp;
00193         ccp = c - cp;
00194 
00195         // update objectives
00196         objective_p = objective;
00197         objective = fun_x;
00198 
00199         // regularize objective with tree norm
00200         double L1q_norm = 0.0;
00201         for (int m=0; m<n_classes; m++)
00202             L1q_norm += w.col(m).norm();
00203         objective += lambda*L1q_norm;
00204 
00205         //cout << "Objective = " << objective << endl;
00206 
00207         // check for termination of whole process
00208         if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
00209         {
00210             SG_SINFO("Objective changes less than tolerance\n");
00211             done = true;
00212         }
00213 
00214         iter++;
00215     }
00216     SG_SINFO("%d iterations passed, objective = %f\n",iter,objective);
00217     internal::set_is_malloc_allowed(true);
00218 
00219     // output computed weight vectors and intercepts
00220     SGMatrix<float64_t> r_w(n_feats,n_classes);
00221     for (j=0; j<n_classes; j++)
00222     {
00223         for (i=0; i<n_feats; i++)
00224             r_w(i,j) = w(i,j);
00225     }
00226     //r_w.display_matrix();
00227     SGVector<float64_t> r_c(n_classes);
00228     for (j=0; j<n_classes; j++)
00229         r_c[j] = c[j];
00230     return slep_result_t(r_w, r_c);
00231 };
00232 };
00233 #endif