SHOGUN: malsar_joint_feature_learning.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Sergey Lisitsyn
00008  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
00009  */
00010 
00011 #include <shogun/lib/malsar/malsar_joint_feature_learning.h>
00012 #ifdef HAVE_EIGEN3
00013 #include <shogun/lib/Signal.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/mathematics/eigen3.h>
00016 #include <iostream>
00017 
00018 using namespace Eigen;
00019 using namespace std;
00020 
00021 namespace shogun
00022 {
00023 
00024 malsar_result_t malsar_joint_feature_learning(
00025         CDotFeatures* features,
00026         double* y,
00027         double rho1,
00028         double rho2,
00029         const malsar_options& options)
00030 {
00031     int task;
00032     int n_feats = features->get_dim_feature_space();
00033     SG_SDEBUG("n feats = %d\n", n_feats);
00034     int n_vecs = features->get_num_vectors();
00035     SG_SDEBUG("n vecs = %d\n", n_vecs);
00036     int n_tasks = options.n_tasks;
00037     SG_SDEBUG("n tasks = %d\n", n_tasks);
00038 
00039     int iter = 0;
00040 
00041     // initialize weight vector and bias for each task
00042     MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
00043     VectorXd Cs = VectorXd::Zero(n_tasks);
00044     MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
00045     VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
00046 
00047     double t=1, t_old=0;
00048     double gamma=1, gamma_inc=2;
00049     double obj=0.0, obj_old=0.0;
00050 
00051     internal::set_is_malloc_allowed(false);
00052     bool done = false;
00053     while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
00054     {
00055         double alpha = double(t_old - 1)/t;
00056 
00057         // compute search point
00058         Ws = (1+alpha)*Wz - alpha*Wz_old;
00059         Cs = (1+alpha)*Cz - alpha*Cz_old;
00060 
00061         // zero gradient
00062         gWs.setZero();
00063         gCs.setZero();
00064 
00065         // compute gradient and objective at search point
00066         double Fs = 0;
00067         for (task=0; task<n_tasks; task++)
00068         {
00069             SGVector<index_t> task_idx = options.tasks_indices[task];
00070             int n_task_vecs = task_idx.vlen;
00071             for (int i=0; i<n_task_vecs; i++)
00072             {
00073                 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
00074                 double bb = CMath::max(aa,0.0);
00075 
00076                 // avoid underflow when computing exponential loss
00077                 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
00078                 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
00079 
00080                 gCs[task] += b;
00081                 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
00082             }
00083         }
00084         gWs.noalias() += 2*rho2*Ws;
00085         
00086         // add regularizer
00087         Fs += Ws.squaredNorm();
00088 
00089         //cout << "gWs" << endl << gWs << endl;
00090         //cout << "gCs" << endl << gCs << endl;
00091         //SG_SPRINT("Fs = %f\n",Fs);
00092 
00093         double Fzp = 0.0;
00094 
00095         int inner_iter = 0;
00096         // line search, Armijo-Goldstein scheme
00097         while (inner_iter <= 1000)
00098         {
00099             // compute lasso projection of Ws - gWs/gamma
00100             for (int i=0; i<n_feats; i++)
00101             {
00102                 Wzp.row(i).noalias() = Ws.row(i) - gWs.row(i)/gamma;
00103                 double norm = Wzp.row(i).lpNorm<2>();
00104                 if (norm == 0.0)
00105                     Wzp.row(i).setZero();
00106                 else
00107                 {
00108                     double threshold = norm - rho1/gamma;
00109                     if (threshold < 0.0)
00110                         Wzp.row(i).setZero();
00111                     else
00112                         Wzp.row(i) *= threshold/norm;
00113                 }
00114             }
00115             // walk in direction of antigradient 
00116             Czp = Cs - gCs/gamma;
00117             
00118             // compute objective at line search point
00119             Fzp = 0.0;
00120             for (task=0; task<n_tasks; task++)
00121             {
00122                 SGVector<index_t> task_idx = options.tasks_indices[task];
00123                 int n_task_vecs = task_idx.vlen;
00124                 for (int i=0; i<n_task_vecs; i++)
00125                 {
00126                     double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
00127                     double bb = CMath::max(aa,0.0);
00128 
00129                     Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
00130                 }
00131             }
00132             Fzp += rho2*Wzp.squaredNorm();
00133 
00134             // compute delta between line search point and search point
00135             delta_Wzp = Wzp - Ws;
00136             delta_Czp = Czp - Cs;
00137 
00138             // norms of delta
00139             double nrm_delta_Wzp = delta_Wzp.squaredNorm();
00140             double nrm_delta_Czp = delta_Czp.squaredNorm();
00141 
00142             double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
00143 
00144             double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
00145                 (delta_Czp.transpose()*gCs).trace() +
00146                 (gamma/2)*nrm_delta_Wzp +
00147                 (gamma/2)*nrm_delta_Czp;
00148 
00149             // break if delta is getting too small
00150             if (r_sum <= 1e-20)
00151             {
00152                 SG_SDEBUG("Line search point is too close to search point\n");
00153                 done = true;
00154                 break;
00155             }
00156 
00157             // break if objective at line searc point is smaller than Fzp_gamma
00158             if (Fzp <= Fzp_gamma)
00159                 break;
00160             else
00161                 gamma *= gamma_inc;
00162 
00163             inner_iter++;
00164         }
00165 
00166         Wz_old = Wz;
00167         Cz_old = Cz;
00168         Wz = Wzp;
00169         Cz = Czp;
00170 
00171         // compute objective value
00172         obj_old = obj;
00173         obj = Fzp;
00174         for (int i=0; i<n_feats; i++)
00175             obj += rho1*(Wz.row(i).lpNorm<2>());
00176         //for (task=0; task<n_tasks; task++)
00177         //  obj += rho1*(Wz.col(task).norm());
00178         SG_SDEBUG("Obj = %f\n",obj);
00179         //SG_SABS_PROGRESS(obj,0.0);
00180         // check if process should be terminated 
00181         switch (options.termination)
00182         {
00183             case 0:
00184                 if (iter>=2)
00185                 {
00186                     if ( CMath::abs(obj-obj_old) <= options.tolerance )
00187                     {
00188                         SG_SDEBUG("Objective changes less than tolerance\n");
00189                         done = true;
00190                     }
00191                 }
00192             break;
00193             case 1:
00194                 if (iter>=2)
00195                 {
00196                     if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
00197                         done = true;
00198                 }
00199             break;
00200             case 2:
00201                 if (CMath::abs(obj) <= options.tolerance)
00202                     done = true;
00203             break;
00204             case 3:
00205                 if (iter>=options.max_iter)
00206                     done = true;
00207             break;
00208         }
00209 
00210         iter++;
00211         t_old = t;
00212         t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
00213     }
00214     internal::set_is_malloc_allowed(true);
00215     SG_SDONE();
00216     SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);
00217 
00218     SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
00219     for (int i=0; i<n_feats; i++)
00220     {
00221         for (task=0; task<n_tasks; task++)
00222             tasks_w(i,task) = Wzp(i,task);
00223     }
00224     SGVector<float64_t> tasks_c(n_tasks);
00225     for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
00226     return malsar_result_t(tasks_w, tasks_c);
00227 };
00228 };
00229 #endif