SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
malsar_joint_feature_learning.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
9  */
10 
11 
13 #ifdef USE_GPL_SHOGUN
14 #include <shogun/lib/Signal.h>
17 #include <iostream>
18 
19 using namespace Eigen;
20 using namespace std;
21 
22 namespace shogun
23 {
24 
25 malsar_result_t malsar_joint_feature_learning(
26  CDotFeatures* features,
27  double* y,
28  double rho1,
29  double rho2,
30  const malsar_options& options)
31 {
32  int task;
33  int n_feats = features->get_dim_feature_space();
34  SG_SDEBUG("n feats = %d\n", n_feats)
35  int n_vecs = features->get_num_vectors();
36  SG_SDEBUG("n vecs = %d\n", n_vecs)
37  int n_tasks = options.n_tasks;
38  SG_SDEBUG("n tasks = %d\n", n_tasks)
39 
40  int iter = 0;
41 
42  // initialize weight vector and bias for each task
43  MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
44  VectorXd Cs = VectorXd::Zero(n_tasks);
45  MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
46  VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
47 
48  double t=1, t_old=0;
49  double gamma=1, gamma_inc=2;
50  double obj=0.0, obj_old=0.0;
51 
52  //internal::set_is_malloc_allowed(false);
53  bool done = false;
54  while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
55  {
56  double alpha = double(t_old - 1)/t;
57 
58  // compute search point
59  Ws = (1+alpha)*Wz - alpha*Wz_old;
60  Cs = (1+alpha)*Cz - alpha*Cz_old;
61 
62  // zero gradient
63  gWs.setZero();
64  gCs.setZero();
65 
66  // compute gradient and objective at search point
67  double Fs = 0;
68  for (task=0; task<n_tasks; task++)
69  {
70  SGVector<index_t> task_idx = options.tasks_indices[task];
71  int n_task_vecs = task_idx.vlen;
72  for (int i=0; i<n_task_vecs; i++)
73  {
74  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
75  double bb = CMath::max(aa,0.0);
76 
77  // avoid underflow when computing exponential loss
78  Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
79  double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
80 
81  gCs[task] += b;
82  features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
83  }
84  }
85  gWs.noalias() += 2*rho2*Ws;
86 
87  // add regularizer
88  Fs += Ws.squaredNorm();
89 
90  //cout << "gWs" << endl << gWs << endl;
91  //cout << "gCs" << endl << gCs << endl;
92  //SG_SPRINT("Fs = %f\n",Fs)
93 
94  double Fzp = 0.0;
95 
96  int inner_iter = 0;
97  // line search, Armijo-Goldstein scheme
98  while (inner_iter <= 1000)
99  {
100  // compute lasso projection of Ws - gWs/gamma
101  for (int i=0; i<n_feats; i++)
102  {
103  Wzp.row(i).noalias() = Ws.row(i) - gWs.row(i)/gamma;
104  double norm = Wzp.row(i).lpNorm<2>();
105  if (norm == 0.0)
106  Wzp.row(i).setZero();
107  else
108  {
109  double threshold = norm - rho1/gamma;
110  if (threshold < 0.0)
111  Wzp.row(i).setZero();
112  else
113  Wzp.row(i) *= threshold/norm;
114  }
115  }
116  // walk in direction of antigradient
117  Czp = Cs - gCs/gamma;
118 
119  // compute objective at line search point
120  Fzp = 0.0;
121  for (task=0; task<n_tasks; task++)
122  {
123  SGVector<index_t> task_idx = options.tasks_indices[task];
124  int n_task_vecs = task_idx.vlen;
125  for (int i=0; i<n_task_vecs; i++)
126  {
127  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
128  double bb = CMath::max(aa,0.0);
129 
130  Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
131  }
132  }
133  Fzp += rho2*Wzp.squaredNorm();
134 
135  // compute delta between line search point and search point
136  delta_Wzp = Wzp - Ws;
137  delta_Czp = Czp - Cs;
138 
139  // norms of delta
140  double nrm_delta_Wzp = delta_Wzp.squaredNorm();
141  double nrm_delta_Czp = delta_Czp.squaredNorm();
142 
143  double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
144 
145  double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
146  (delta_Czp.transpose()*gCs).trace() +
147  (gamma/2)*nrm_delta_Wzp +
148  (gamma/2)*nrm_delta_Czp;
149 
150  // break if delta is getting too small
151  if (r_sum <= 1e-20)
152  {
153  SG_SDEBUG("Line search point is too close to search point\n")
154  done = true;
155  break;
156  }
157 
158  // break if objective at line searc point is smaller than Fzp_gamma
159  if (Fzp <= Fzp_gamma)
160  break;
161  else
162  gamma *= gamma_inc;
163 
164  inner_iter++;
165  }
166 
167  Wz_old = Wz;
168  Cz_old = Cz;
169  Wz = Wzp;
170  Cz = Czp;
171 
172  // compute objective value
173  obj_old = obj;
174  obj = Fzp;
175  for (int i=0; i<n_feats; i++)
176  obj += rho1*(Wz.row(i).lpNorm<2>());
177  //for (task=0; task<n_tasks; task++)
178  // obj += rho1*(Wz.col(task).norm());
179  SG_SDEBUG("Obj = %f\n",obj)
180  //SG_SABS_PROGRESS(obj,0.0)
181  // check if process should be terminated
182  switch (options.termination)
183  {
184  case 0:
185  if (iter>=2)
186  {
187  if ( CMath::abs(obj-obj_old) <= options.tolerance )
188  {
189  SG_SDEBUG("Objective changes less than tolerance\n")
190  done = true;
191  }
192  }
193  break;
194  case 1:
195  if (iter>=2)
196  {
197  if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
198  done = true;
199  }
200  break;
201  case 2:
202  if (CMath::abs(obj) <= options.tolerance)
203  done = true;
204  break;
205  case 3:
206  if (iter>=options.max_iter)
207  done = true;
208  break;
209  }
210 
211  iter++;
212  t_old = t;
213  t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
214  }
215  //internal::set_is_malloc_allowed(true);
216  SG_SDONE()
217  SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
218 
219  SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
220  for (int i=0; i<n_feats; i++)
221  {
222  for (task=0; task<n_tasks; task++)
223  tasks_w(i,task) = Wzp(i,task);
224  }
225  SGVector<float64_t> tasks_c(n_tasks);
226  for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
227  return malsar_result_t(tasks_w, tasks_c);
228 };
229 };
230 
231 #endif //USE_GPL_SHOGUN
#define SG_SDONE()
Definition: SGIO.h:193
Definition: SGMatrix.h:20
Definition: basetag.h:132
double float64_t
Definition: common.h:50
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:168
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68

SHOGUN Machine Learning Toolbox - Documentation