Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/malsar/malsar_joint_feature_learning.h>
00012 #ifdef HAVE_EIGEN3
00013 #include <shogun/lib/Signal.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/mathematics/eigen3.h>
00016 #include <iostream>
00017
00018 using namespace Eigen;
00019 using namespace std;
00020
00021 namespace shogun
00022 {
00023
00024 malsar_result_t malsar_joint_feature_learning(
00025 CDotFeatures* features,
00026 double* y,
00027 double rho1,
00028 double rho2,
00029 const malsar_options& options)
00030 {
00031 int task;
00032 int n_feats = features->get_dim_feature_space();
00033 SG_SDEBUG("n feats = %d\n", n_feats);
00034 int n_vecs = features->get_num_vectors();
00035 SG_SDEBUG("n vecs = %d\n", n_vecs);
00036 int n_tasks = options.n_tasks;
00037 SG_SDEBUG("n tasks = %d\n", n_tasks);
00038
00039 int iter = 0;
00040
00041
00042 MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
00043 VectorXd Cs = VectorXd::Zero(n_tasks);
00044 MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
00045 VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
00046
00047 double t=1, t_old=0;
00048 double gamma=1, gamma_inc=2;
00049 double obj=0.0, obj_old=0.0;
00050
00051 internal::set_is_malloc_allowed(false);
00052 bool done = false;
00053 while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
00054 {
00055 double alpha = double(t_old - 1)/t;
00056
00057
00058 Ws = (1+alpha)*Wz - alpha*Wz_old;
00059 Cs = (1+alpha)*Cz - alpha*Cz_old;
00060
00061
00062 gWs.setZero();
00063 gCs.setZero();
00064
00065
00066 double Fs = 0;
00067 for (task=0; task<n_tasks; task++)
00068 {
00069 SGVector<index_t> task_idx = options.tasks_indices[task];
00070 int n_task_vecs = task_idx.vlen;
00071 for (int i=0; i<n_task_vecs; i++)
00072 {
00073 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
00074 double bb = CMath::max(aa,0.0);
00075
00076
00077 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
00078 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
00079
00080 gCs[task] += b;
00081 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
00082 }
00083 }
00084 gWs.noalias() += 2*rho2*Ws;
00085
00086
00087 Fs += Ws.squaredNorm();
00088
00089
00090
00091
00092
00093 double Fzp = 0.0;
00094
00095 int inner_iter = 0;
00096
00097 while (inner_iter <= 1000)
00098 {
00099
00100 for (int i=0; i<n_feats; i++)
00101 {
00102 Wzp.row(i).noalias() = Ws.row(i) - gWs.row(i)/gamma;
00103 double norm = Wzp.row(i).lpNorm<2>();
00104 if (norm == 0.0)
00105 Wzp.row(i).setZero();
00106 else
00107 {
00108 double threshold = norm - rho1/gamma;
00109 if (threshold < 0.0)
00110 Wzp.row(i).setZero();
00111 else
00112 Wzp.row(i) *= threshold/norm;
00113 }
00114 }
00115
00116 Czp = Cs - gCs/gamma;
00117
00118
00119 Fzp = 0.0;
00120 for (task=0; task<n_tasks; task++)
00121 {
00122 SGVector<index_t> task_idx = options.tasks_indices[task];
00123 int n_task_vecs = task_idx.vlen;
00124 for (int i=0; i<n_task_vecs; i++)
00125 {
00126 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
00127 double bb = CMath::max(aa,0.0);
00128
00129 Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
00130 }
00131 }
00132 Fzp += rho2*Wzp.squaredNorm();
00133
00134
00135 delta_Wzp = Wzp - Ws;
00136 delta_Czp = Czp - Cs;
00137
00138
00139 double nrm_delta_Wzp = delta_Wzp.squaredNorm();
00140 double nrm_delta_Czp = delta_Czp.squaredNorm();
00141
00142 double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
00143
00144 double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
00145 (delta_Czp.transpose()*gCs).trace() +
00146 (gamma/2)*nrm_delta_Wzp +
00147 (gamma/2)*nrm_delta_Czp;
00148
00149
00150 if (r_sum <= 1e-20)
00151 {
00152 SG_SDEBUG("Line search point is too close to search point\n");
00153 done = true;
00154 break;
00155 }
00156
00157
00158 if (Fzp <= Fzp_gamma)
00159 break;
00160 else
00161 gamma *= gamma_inc;
00162
00163 inner_iter++;
00164 }
00165
00166 Wz_old = Wz;
00167 Cz_old = Cz;
00168 Wz = Wzp;
00169 Cz = Czp;
00170
00171
00172 obj_old = obj;
00173 obj = Fzp;
00174 for (int i=0; i<n_feats; i++)
00175 obj += rho1*(Wz.row(i).lpNorm<2>());
00176
00177
00178 SG_SDEBUG("Obj = %f\n",obj);
00179
00180
00181 switch (options.termination)
00182 {
00183 case 0:
00184 if (iter>=2)
00185 {
00186 if ( CMath::abs(obj-obj_old) <= options.tolerance )
00187 {
00188 SG_SDEBUG("Objective changes less than tolerance\n");
00189 done = true;
00190 }
00191 }
00192 break;
00193 case 1:
00194 if (iter>=2)
00195 {
00196 if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
00197 done = true;
00198 }
00199 break;
00200 case 2:
00201 if (CMath::abs(obj) <= options.tolerance)
00202 done = true;
00203 break;
00204 case 3:
00205 if (iter>=options.max_iter)
00206 done = true;
00207 break;
00208 }
00209
00210 iter++;
00211 t_old = t;
00212 t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
00213 }
00214 internal::set_is_malloc_allowed(true);
00215 SG_SDONE();
00216 SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);
00217
00218 SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
00219 for (int i=0; i<n_feats; i++)
00220 {
00221 for (task=0; task<n_tasks; task++)
00222 tasks_w(i,task) = Wzp(i,task);
00223 }
00224 SGVector<float64_t> tasks_c(n_tasks);
00225 for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
00226 return malsar_result_t(tasks_w, tasks_c);
00227 };
00228 };
00229 #endif