SHOGUN: malsar_clustered.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Sergey Lisitsyn
00008  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
00009  */
00010 
00011 #include <shogun/lib/malsar/malsar_clustered.h>
00012 #ifdef HAVE_EIGEN3
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/mathematics/eigen3.h>
00015 #include <iostream>
00016 #include <shogun/lib/external/libqp.h>
00017 
00018 using namespace Eigen;
00019 using namespace std;
00020 
00021 namespace shogun
00022 {
00023 
00024 static double* H_diag_matrix;
00025 static int H_diag_matrix_ld;
00026 
00027 static const double* get_col(uint32_t j)
00028 {
00029     return H_diag_matrix + j*H_diag_matrix_ld;
00030 }
00031 
00032 malsar_result_t malsar_clustered(
00033         CDotFeatures* features,
00034         double* y,
00035         double rho1,
00036         double rho2,
00037         const malsar_options& options)
00038 {
00039     int task;
00040     int n_feats = features->get_dim_feature_space();
00041     SG_SDEBUG("n feats = %d\n", n_feats);
00042     int n_vecs = features->get_num_vectors();
00043     SG_SDEBUG("n vecs = %d\n", n_vecs);
00044     int n_tasks = options.n_tasks;
00045     SG_SDEBUG("n tasks = %d\n", n_tasks);
00046 
00047     H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks);
00048     for (int i=0; i<n_tasks; i++)
00049         H_diag_matrix[i*n_tasks+i] = 2.0;
00050     H_diag_matrix_ld = n_tasks;
00051 
00052     int iter = 0;
00053 
00054     // initialize weight vector and bias for each task
00055     MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
00056     VectorXd Cs = VectorXd::Zero(n_tasks);
00057     MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks;
00058     MatrixXd IM = Ms;
00059     MatrixXd IMsqinv = Ms;
00060     MatrixXd invEtaMWt = Ms;
00061 
00062     MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
00063     VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
00064     MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms;
00065     MatrixXd Mzp_Pz;
00066 
00067     double eta = rho2/rho1;
00068     double c = rho1*eta*(1+eta);
00069 
00070     double t=1, t_old=0;
00071     double gamma=1, gamma_inc=2;
00072     double obj=0.0, obj_old=0.0;
00073             
00074     double* diag_H = SG_MALLOC(double, n_tasks);
00075     double* f = SG_MALLOC(double, n_tasks);
00076     double* a = SG_MALLOC(double, n_tasks);
00077     double* lb = SG_MALLOC(double, n_tasks);
00078     double* ub = SG_MALLOC(double, n_tasks);
00079     double* x = SG_CALLOC(double, n_tasks);
00080 
00081     internal::set_is_malloc_allowed(false);
00082     bool done = false;
00083     while (!done && iter <= options.max_iter)
00084     {
00085         double alpha = double(t_old - 1)/t;
00086         SG_SDEBUG("alpha=%f\n",alpha);
00087 
00088         // compute search point
00089         Ws = (1+alpha)*Wz - alpha*Wz_old;
00090         Cs = (1+alpha)*Cz - alpha*Cz_old;
00091         Ms = (1+alpha)*Mz - alpha*Mz_old;
00092 
00093         // zero gradient
00094         gWs.setZero();
00095         gCs.setZero();
00096         internal::set_is_malloc_allowed(true);
00097         SG_SDEBUG("Computing gradient\n");
00098         IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms);
00099         // cout << "M" << endl << Ms << endl;
00100         // cout << "IM" << endl << IM << endl;
00101         IMsqinv = (IM*IM).inverse();
00102         invEtaMWt = IM.inverse()*Ws.transpose();
00103         //cout << "invEtaMWt" << endl << invEtaMWt << endl;
00104         gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv;
00105         gWs.noalias() += 2*c*invEtaMWt.transpose();
00106         internal::set_is_malloc_allowed(false);
00107 
00108         // compute gradient and objective at search point
00109         double Fs = 0;
00110         for (task=0; task<n_tasks; task++)
00111         {
00112             SGVector<index_t> task_idx = options.tasks_indices[task];
00113             int n_vecs_task = task_idx.vlen;
00114             for (int i=0; i<n_vecs_task; i++)
00115             {
00116                 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
00117                 double bb = CMath::max(aa,0.0);
00118 
00119                 // avoid underflow when computing exponential loss
00120                 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
00121                 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task;
00122                 gCs[task] += b;
00123                 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
00124             }
00125         }
00126         //cout << "gWs" << endl << gWs << endl;
00127         //cout << "gCs" << endl << gCs << endl;
00128         SG_SDEBUG("Computed gradient\n");
00129         
00130         // add regularizer
00131         Fs += c*(Ws*invEtaMWt).trace();
00132         SG_SDEBUG("Fs = %f \n", Fs);
00133 
00134         double Fzp = 0.0;
00135 
00136         int inner_iter = 0;
00137         // line search, Armijo-Goldstein scheme
00138         while (inner_iter <= 1000)
00139         {
00140             Wzp = Ws - gWs/gamma;
00141             Czp = Cs - gCs/gamma;
00142             // compute singular projection of Ms - gMs/gamma with k
00143             internal::set_is_malloc_allowed(true);
00144             EigenSolver<MatrixXd> eigensolver(Ms-gMs/gamma);
00145 
00146             // solve problem
00147             // min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1
00148             for (int i=0; i<n_tasks; i++) 
00149             {
00150                 diag_H[i] = 2.0;
00151                 f[i] = -2*eigensolver.eigenvalues()[i].real();
00152                 SG_SDEBUG("%dth eigenvalue %f\n",i,eigensolver.eigenvalues()[i].real());
00153                 a[i] = 1.0;
00154                 lb[i] = 0.0;
00155                 ub[i] = 1.0;
00156                 x[i] = double(options.n_clusters)/n_tasks;
00157             }
00158             double b = options.n_clusters;//eigensolver.eigenvalues().sum().real();
00159             SG_SDEBUG("b = %f\n", b);
00160             SG_SDEBUG("Calling libqp\n");
00161             libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL);
00162             SG_SDEBUG("Libqp objective = %f\n",problem_state.QP);
00163             SG_SDEBUG("Exit code = %d\n",problem_state.exitflag);
00164             SG_SDEBUG("%d iteration passed\n",problem_state.nIter);
00165             SG_SDEBUG("Solution is \n [ ");
00166             for (int i=0; i<n_tasks; i++)
00167                 SG_SDEBUG("%f ", x[i]);
00168             SG_SDEBUG("]\n");
00169             Map<VectorXd> Mzp_DiagSigz(x,n_tasks);
00170             Mzp_Pz = eigensolver.eigenvectors().real();
00171             Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose();
00172             internal::set_is_malloc_allowed(false);
00173             // walk in direction of antigradient 
00174             for (int i=0; i<n_tasks; i++)
00175                 Mzp_DiagSigz[i] += eta;
00176             internal::set_is_malloc_allowed(true);
00177             invEtaMWt = (Mzp_Pz*
00178                          (Mzp_DiagSigz.cwiseInverse().asDiagonal())*
00179                          Mzp_Pz.transpose())*
00180                          Wzp.transpose();
00181             internal::set_is_malloc_allowed(false);
00182             // compute objective at line search point
00183             Fzp = 0.0;
00184             for (task=0; task<n_tasks; task++)
00185             {
00186                 SGVector<index_t> task_idx = options.tasks_indices[task];
00187                 int n_vecs_task = task_idx.vlen;
00188                 for (int i=0; i<n_vecs_task; i++)
00189                 {
00190                     double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
00191                     double bb = CMath::max(aa,0.0);
00192 
00193                     Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
00194                 }
00195             }
00196             Fzp += c*(Wzp*invEtaMWt).trace();
00197 
00198             // compute delta between line search point and search point
00199             delta_Wzp = Wzp - Ws;
00200             delta_Czp = Czp - Cs;
00201             delta_Mzp = Mzp - Ms;
00202 
00203             // norms of delta
00204             double nrm_delta_Wzp = delta_Wzp.squaredNorm();
00205             double nrm_delta_Czp = delta_Czp.squaredNorm();
00206             double nrm_delta_Mzp = delta_Mzp.squaredNorm();
00207 
00208             double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3;
00209 
00210             double Fzp_gamma = 0.0;
00211             if (n_feats > n_tasks)
00212             {
00213                 Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
00214                                  (delta_Czp.transpose()*gCs).trace() +
00215                                  (delta_Mzp.transpose()*gMs).trace() +
00216                                  (gamma/2)*nrm_delta_Wzp +
00217                                  (gamma/2)*nrm_delta_Czp +
00218                                  (gamma/2)*nrm_delta_Mzp;
00219             }
00220             else
00221             {
00222                 Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() +
00223                                  (gCs.transpose()*delta_Czp).trace() +
00224                                  (gMs.transpose()*delta_Mzp).trace() +
00225                                  (gamma/2)*nrm_delta_Wzp +
00226                                  (gamma/2)*nrm_delta_Czp +
00227                                  (gamma/2)*nrm_delta_Mzp;
00228             }
00229 
00230             // break if delta is getting too small
00231             if (r_sum <= 1e-20)
00232             {
00233                 done = true;
00234                 break;
00235             }
00236 
00237             // break if objective at line searc point is smaller than Fzp_gamma
00238             if (Fzp <= Fzp_gamma)
00239                 break;
00240             else
00241                 gamma *= gamma_inc;
00242 
00243             inner_iter++;
00244         }
00245 
00246         Wz_old = Wz;
00247         Cz_old = Cz;
00248         Mz_old = Mz;
00249         Wz = Wzp;
00250         Cz = Czp;
00251         Mz = Mzp;
00252 
00253         // compute objective value
00254         obj_old = obj;
00255         obj = Fzp;
00256 
00257         // check if process should be terminated 
00258         switch (options.termination)
00259         {
00260             case 0:
00261                 if (iter>=2)
00262                 {
00263                     if ( CMath::abs(obj-obj_old) <= options.tolerance )
00264                         done = true;
00265                 }
00266             break;
00267             case 1:
00268                 if (iter>=2)
00269                 {
00270                     if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
00271                         done = true;
00272                 }
00273             break;
00274             case 2:
00275                 if (CMath::abs(obj) <= options.tolerance)
00276                     done = true;
00277             break;
00278             case 3:
00279                 if (iter>=options.max_iter)
00280                     done = true;
00281             break;
00282         }
00283 
00284         iter++;
00285         t_old = t;
00286         t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
00287     }
00288     internal::set_is_malloc_allowed(true);
00289     SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);
00290     
00291     SG_FREE(H_diag_matrix);
00292     SG_FREE(diag_H);
00293     SG_FREE(f);
00294     SG_FREE(a);
00295     SG_FREE(lb);
00296     SG_FREE(ub);
00297     SG_FREE(x);
00298 
00299     SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
00300     for (int i=0; i<n_feats; i++)
00301     {
00302         for (task=0; task<n_tasks; task++)
00303             tasks_w(i,task) = Wzp(i,task);
00304     }
00305     //tasks_w.display_matrix();
00306     SGVector<float64_t> tasks_c(n_tasks);
00307     for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
00308     return malsar_result_t(tasks_w, tasks_c);
00309 };
00310 };
00311 #endif