SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
malsar_joint_feature_learning.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
9  */
10 
12 #ifdef HAVE_EIGEN3
13 #include <shogun/lib/Signal.h>
16 #include <iostream>
17 
18 using namespace Eigen;
19 using namespace std;
20 
21 namespace shogun
22 {
23 
25  CDotFeatures* features,
26  double* y,
27  double rho1,
28  double rho2,
29  const malsar_options& options)
30 {
31  int task;
32  int n_feats = features->get_dim_feature_space();
33  SG_SDEBUG("n feats = %d\n", n_feats)
34  int n_vecs = features->get_num_vectors();
35  SG_SDEBUG("n vecs = %d\n", n_vecs)
36  int n_tasks = options.n_tasks;
37  SG_SDEBUG("n tasks = %d\n", n_tasks)
38 
39  int iter = 0;
40 
41  // initialize weight vector and bias for each task
42  MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
43  VectorXd Cs = VectorXd::Zero(n_tasks);
44  MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
45  VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
46 
47  double t=1, t_old=0;
48  double gamma=1, gamma_inc=2;
49  double obj=0.0, obj_old=0.0;
50 
51  //internal::set_is_malloc_allowed(false);
52  bool done = false;
53  while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
54  {
55  double alpha = double(t_old - 1)/t;
56 
57  // compute search point
58  Ws = (1+alpha)*Wz - alpha*Wz_old;
59  Cs = (1+alpha)*Cz - alpha*Cz_old;
60 
61  // zero gradient
62  gWs.setZero();
63  gCs.setZero();
64 
65  // compute gradient and objective at search point
66  double Fs = 0;
67  for (task=0; task<n_tasks; task++)
68  {
69  SGVector<index_t> task_idx = options.tasks_indices[task];
70  int n_task_vecs = task_idx.vlen;
71  for (int i=0; i<n_task_vecs; i++)
72  {
73  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
74  double bb = CMath::max(aa,0.0);
75 
76  // avoid underflow when computing exponential loss
77  Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
78  double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
79 
80  gCs[task] += b;
81  features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
82  }
83  }
84  gWs.noalias() += 2*rho2*Ws;
85 
86  // add regularizer
87  Fs += Ws.squaredNorm();
88 
89  //cout << "gWs" << endl << gWs << endl;
90  //cout << "gCs" << endl << gCs << endl;
91  //SG_SPRINT("Fs = %f\n",Fs)
92 
93  double Fzp = 0.0;
94 
95  int inner_iter = 0;
96  // line search, Armijo-Goldstein scheme
97  while (inner_iter <= 1000)
98  {
99  // compute lasso projection of Ws - gWs/gamma
100  for (int i=0; i<n_feats; i++)
101  {
102  Wzp.row(i).noalias() = Ws.row(i) - gWs.row(i)/gamma;
103  double norm = Wzp.row(i).lpNorm<2>();
104  if (norm == 0.0)
105  Wzp.row(i).setZero();
106  else
107  {
108  double threshold = norm - rho1/gamma;
109  if (threshold < 0.0)
110  Wzp.row(i).setZero();
111  else
112  Wzp.row(i) *= threshold/norm;
113  }
114  }
115  // walk in direction of antigradient
116  Czp = Cs - gCs/gamma;
117 
118  // compute objective at line search point
119  Fzp = 0.0;
120  for (task=0; task<n_tasks; task++)
121  {
122  SGVector<index_t> task_idx = options.tasks_indices[task];
123  int n_task_vecs = task_idx.vlen;
124  for (int i=0; i<n_task_vecs; i++)
125  {
126  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
127  double bb = CMath::max(aa,0.0);
128 
129  Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
130  }
131  }
132  Fzp += rho2*Wzp.squaredNorm();
133 
134  // compute delta between line search point and search point
135  delta_Wzp = Wzp - Ws;
136  delta_Czp = Czp - Cs;
137 
138  // norms of delta
139  double nrm_delta_Wzp = delta_Wzp.squaredNorm();
140  double nrm_delta_Czp = delta_Czp.squaredNorm();
141 
142  double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
143 
144  double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
145  (delta_Czp.transpose()*gCs).trace() +
146  (gamma/2)*nrm_delta_Wzp +
147  (gamma/2)*nrm_delta_Czp;
148 
149  // break if delta is getting too small
150  if (r_sum <= 1e-20)
151  {
152  SG_SDEBUG("Line search point is too close to search point\n")
153  done = true;
154  break;
155  }
156 
157  // break if objective at line searc point is smaller than Fzp_gamma
158  if (Fzp <= Fzp_gamma)
159  break;
160  else
161  gamma *= gamma_inc;
162 
163  inner_iter++;
164  }
165 
166  Wz_old = Wz;
167  Cz_old = Cz;
168  Wz = Wzp;
169  Cz = Czp;
170 
171  // compute objective value
172  obj_old = obj;
173  obj = Fzp;
174  for (int i=0; i<n_feats; i++)
175  obj += rho1*(Wz.row(i).lpNorm<2>());
176  //for (task=0; task<n_tasks; task++)
177  // obj += rho1*(Wz.col(task).norm());
178  SG_SDEBUG("Obj = %f\n",obj)
179  //SG_SABS_PROGRESS(obj,0.0)
180  // check if process should be terminated
181  switch (options.termination)
182  {
183  case 0:
184  if (iter>=2)
185  {
186  if ( CMath::abs(obj-obj_old) <= options.tolerance )
187  {
188  SG_SDEBUG("Objective changes less than tolerance\n")
189  done = true;
190  }
191  }
192  break;
193  case 1:
194  if (iter>=2)
195  {
196  if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
197  done = true;
198  }
199  break;
200  case 2:
201  if (CMath::abs(obj) <= options.tolerance)
202  done = true;
203  break;
204  case 3:
205  if (iter>=options.max_iter)
206  done = true;
207  break;
208  }
209 
210  iter++;
211  t_old = t;
212  t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
213  }
214  //internal::set_is_malloc_allowed(true);
215  SG_SDONE()
216  SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
217 
218  SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
219  for (int i=0; i<n_feats; i++)
220  {
221  for (task=0; task<n_tasks; task++)
222  tasks_w(i,task) = Wzp(i,task);
223  }
224  SGVector<float64_t> tasks_c(n_tasks);
225  for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
226  return malsar_result_t(tasks_w, tasks_c);
227 };
228 };
229 #endif

SHOGUN Machine Learning Toolbox - Documentation