SHOGUN: malsar_low_rank.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Sergey Lisitsyn
00008  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
00009  */
00010 
00011 #include <shogun/lib/malsar/malsar_low_rank.h>
00012 #ifdef HAVE_EIGEN3
00013 #include <shogun/mathematics/eigen3.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <iostream>
00016 
00017 using namespace Eigen;
00018 
00019 namespace shogun
00020 {
00021 
00022 malsar_result_t malsar_low_rank(
00023         CDotFeatures* features,
00024         double* y,
00025         double rho,
00026         const malsar_options& options)
00027 {
00028     int task;
00029     int n_feats = features->get_dim_feature_space();
00030     SG_SDEBUG("n feats = %d\n", n_feats);
00031     int n_vecs = features->get_num_vectors();
00032     SG_SDEBUG("n vecs = %d\n", n_vecs);
00033     int n_tasks = options.n_tasks;
00034     SG_SDEBUG("n tasks = %d\n", n_tasks);
00035 
00036     int iter = 0;
00037 
00038     // initialize weight vector and bias for each task
00039     MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
00040     VectorXd Cs = VectorXd::Zero(n_tasks);
00041     MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
00042     VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
00043 
00044     double t=1, t_old=0;
00045     double gamma=1, gamma_inc=2;
00046     double obj=0.0, obj_old=0.0;
00047 
00048     double rho_L2 = 0.0;
00049 
00050     internal::set_is_malloc_allowed(false);
00051     bool done = false;
00052     while (!done && iter <= options.max_iter)
00053     {
00054         double alpha = double(t_old - 1)/t;
00055 
00056         // compute search point
00057         Ws = (1+alpha)*Wz - alpha*Wz_old;
00058         Cs = (1+alpha)*Cz - alpha*Cz_old;
00059 
00060         // zero gradient
00061         gWs.setZero();
00062         gCs.setZero();
00063 
00064         // compute gradient and objective at search point
00065         double Fs = 0;
00066         for (task=0; task<n_tasks; task++)
00067         {
00068             SGVector<index_t> task_idx = options.tasks_indices[task];
00069             int n_task_vecs = task_idx.vlen;
00070             for (int i=0; i<n_task_vecs; i++)
00071             {
00072                 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
00073                 double bb = CMath::max(aa,0.0);
00074 
00075                 // avoid underflow when computing exponential loss
00076                 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
00077                 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
00078 
00079                 gCs[task] += b;
00080                 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
00081             }
00082         }
00083         gWs.noalias() += 2*rho_L2*Ws;
00084         //SG_SDEBUG("gWs=%f\n",gWs.squaredNorm());
00085 
00086         // add regularizer
00087         Fs += rho_L2*Ws.squaredNorm();
00088 
00089         double Fzp = 0.0;
00090 
00091         int inner_iter = 0;
00092         // line search, Armijo-Goldstein scheme
00093         while (inner_iter <= 1000)
00094         {
00095             // compute trace projection of Ws - gWs/gamma with 2*rho/gamma
00096             internal::set_is_malloc_allowed(true);
00097             Wzp.setZero();
00098             JacobiSVD<MatrixXd> svd((Ws - gWs/gamma).transpose(),ComputeThinU | ComputeThinV);
00099             for (int i=0; i<svd.singularValues().size(); i++)
00100             {
00101                 if (svd.singularValues()[i] > rho/gamma)
00102                     Wzp += (svd.matrixU().col(i)*
00103                            svd.singularValues()[i]*
00104                            svd.matrixV().col(i).transpose()).transpose();
00105             }
00106             internal::set_is_malloc_allowed(false);
00107             // walk in direction of antigradient 
00108             Czp = Cs - gCs/gamma;
00109             
00110             // compute objective at line search point
00111             Fzp = 0.0;
00112             for (task=0; task<n_tasks; task++)
00113             {
00114                 SGVector<index_t> task_idx = options.tasks_indices[task];
00115                 int n_task_vecs = task_idx.vlen;
00116                 for (int i=0; i<n_task_vecs; i++)
00117                 {
00118                     double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
00119                     double bb = CMath::max(aa,0.0);
00120 
00121                     Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
00122                 }
00123             }
00124             Fzp += rho_L2*Wzp.squaredNorm();
00125 
00126             // compute delta between line search point and search point
00127             delta_Wzp = Wzp - Ws;
00128             delta_Czp = Czp - Cs;
00129 
00130             // norms of delta
00131             double nrm_delta_Wzp = delta_Wzp.squaredNorm();
00132             double nrm_delta_Czp = delta_Czp.squaredNorm();
00133 
00134             double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
00135 
00136             double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
00137                 (delta_Czp.transpose()*gCs).trace() +
00138                 (gamma/2)*nrm_delta_Wzp +
00139                 (gamma/2)*nrm_delta_Czp;
00140 
00141             // break if delta is getting too small
00142             if (r_sum <= 1e-20)
00143             {
00144                 done = true;
00145                 break;
00146             }
00147 
00148             // break if objective at line search point is smaller than Fzp_gamma
00149             if (Fzp <= Fzp_gamma)
00150                 break;
00151             else
00152                 gamma *= gamma_inc;
00153         }
00154 
00155         Wz_old = Wz;
00156         Cz_old = Cz;
00157         Wz = Wzp;
00158         Cz = Czp;
00159 
00160         // compute objective value
00161         obj_old = obj;
00162         obj = Fzp;
00163         internal::set_is_malloc_allowed(true);
00164         JacobiSVD<MatrixXd> svd(Wzp, EigenvaluesOnly);
00165         obj += rho*svd.singularValues().sum();
00166         internal::set_is_malloc_allowed(false);
00167 
00168 
00169         // check if process should be terminated 
00170         switch (options.termination)
00171         {
00172             case 0:
00173                 if (iter>=2)
00174                 {
00175                     if ( CMath::abs(obj-obj_old) <= options.tolerance )
00176                         done = true;
00177                 }
00178             break;
00179             case 1:
00180                 if (iter>=2)
00181                 {
00182                     if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
00183                         done = true;
00184                 }
00185             break;
00186             case 2:
00187                 if (CMath::abs(obj) <= options.tolerance)
00188                     done = true;
00189             break;
00190             case 3:
00191                 if (iter>=options.max_iter)
00192                     done = true;
00193             break;
00194         }
00195 
00196         iter++;
00197         t_old = t;
00198         t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
00199     }
00200     internal::set_is_malloc_allowed(true);
00201     SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);
00202 
00203     SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
00204     for (int i=0; i<n_feats; i++)
00205     {
00206         for (task=0; task<n_tasks; task++)
00207             tasks_w(i,task) = Wzp(i,task);
00208     }
00209     SGVector<float64_t> tasks_c(n_tasks);
00210     for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
00211     return malsar_result_t(tasks_w, tasks_c);
00212 };
00213 };
00214 #endif