SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
malsar_clustered.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
9  */
10 
12 #ifdef HAVE_EIGEN3
15 #include <iostream>
17 
18 using namespace Eigen;
19 using namespace std;
20 
21 namespace shogun
22 {
23 
24 static double* H_diag_matrix;
25 static int H_diag_matrix_ld;
26 
27 static const double* get_col(uint32_t j)
28 {
30 }
31 
32 malsar_result_t malsar_clustered(
33  CDotFeatures* features,
34  double* y,
35  double rho1,
36  double rho2,
37  const malsar_options& options)
38 {
39  int task;
40  int n_feats = features->get_dim_feature_space();
41  SG_SDEBUG("n feats = %d\n", n_feats);
42  int n_vecs = features->get_num_vectors();
43  SG_SDEBUG("n vecs = %d\n", n_vecs);
44  int n_tasks = options.n_tasks;
45  SG_SDEBUG("n tasks = %d\n", n_tasks);
46 
47  H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks);
48  for (int i=0; i<n_tasks; i++)
49  H_diag_matrix[i*n_tasks+i] = 2.0;
50  H_diag_matrix_ld = n_tasks;
51 
52  int iter = 0;
53 
54  // initialize weight vector and bias for each task
55  MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
56  VectorXd Cs = VectorXd::Zero(n_tasks);
57  MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks;
58  MatrixXd IM = Ms;
59  MatrixXd IMsqinv = Ms;
60  MatrixXd invEtaMWt = Ms;
61 
62  MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
63  VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
64  MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms;
65  MatrixXd Mzp_Pz;
66 
67  double eta = rho2/rho1;
68  double c = rho1*eta*(1+eta);
69 
70  double t=1, t_old=0;
71  double gamma=1, gamma_inc=2;
72  double obj=0.0, obj_old=0.0;
73 
74  double* diag_H = SG_MALLOC(double, n_tasks);
75  double* f = SG_MALLOC(double, n_tasks);
76  double* a = SG_MALLOC(double, n_tasks);
77  double* lb = SG_MALLOC(double, n_tasks);
78  double* ub = SG_MALLOC(double, n_tasks);
79  double* x = SG_CALLOC(double, n_tasks);
80 
81  internal::set_is_malloc_allowed(false);
82  bool done = false;
83  while (!done && iter <= options.max_iter)
84  {
85  double alpha = double(t_old - 1)/t;
86  SG_SDEBUG("alpha=%f\n",alpha);
87 
88  // compute search point
89  Ws = (1+alpha)*Wz - alpha*Wz_old;
90  Cs = (1+alpha)*Cz - alpha*Cz_old;
91  Ms = (1+alpha)*Mz - alpha*Mz_old;
92 
93  // zero gradient
94  gWs.setZero();
95  gCs.setZero();
96  internal::set_is_malloc_allowed(true);
97  SG_SDEBUG("Computing gradient\n");
98  IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms);
99  // cout << "M" << endl << Ms << endl;
100  // cout << "IM" << endl << IM << endl;
101  IMsqinv = (IM*IM).inverse();
102  invEtaMWt = IM.inverse()*Ws.transpose();
103  //cout << "invEtaMWt" << endl << invEtaMWt << endl;
104  gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv;
105  gWs.noalias() += 2*c*invEtaMWt.transpose();
106  internal::set_is_malloc_allowed(false);
107 
108  // compute gradient and objective at search point
109  double Fs = 0;
110  for (task=0; task<n_tasks; task++)
111  {
112  SGVector<index_t> task_idx = options.tasks_indices[task];
113  int n_vecs_task = task_idx.vlen;
114  for (int i=0; i<n_vecs_task; i++)
115  {
116  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
117  double bb = CMath::max(aa,0.0);
118 
119  // avoid underflow when computing exponential loss
120  Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
121  double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task;
122  gCs[task] += b;
123  features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
124  }
125  }
126  //cout << "gWs" << endl << gWs << endl;
127  //cout << "gCs" << endl << gCs << endl;
128  SG_SDEBUG("Computed gradient\n");
129 
130  // add regularizer
131  Fs += c*(Ws*invEtaMWt).trace();
132  SG_SDEBUG("Fs = %f \n", Fs);
133 
134  double Fzp = 0.0;
135 
136  int inner_iter = 0;
137  // line search, Armijo-Goldstein scheme
138  while (inner_iter <= 1000)
139  {
140  Wzp = Ws - gWs/gamma;
141  Czp = Cs - gCs/gamma;
142  // compute singular projection of Ms - gMs/gamma with k
143  internal::set_is_malloc_allowed(true);
144  EigenSolver<MatrixXd> eigensolver(Ms-gMs/gamma);
145 
146  // solve problem
147  // min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1
148  for (int i=0; i<n_tasks; i++)
149  {
150  diag_H[i] = 2.0;
151  f[i] = -2*eigensolver.eigenvalues()[i].real();
152  SG_SDEBUG("%dth eigenvalue %f\n",i,eigensolver.eigenvalues()[i].real());
153  a[i] = 1.0;
154  lb[i] = 0.0;
155  ub[i] = 1.0;
156  x[i] = double(options.n_clusters)/n_tasks;
157  }
158  double b = options.n_clusters;//eigensolver.eigenvalues().sum().real();
159  SG_SDEBUG("b = %f\n", b);
160  SG_SDEBUG("Calling libqp\n");
161  libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL);
162  SG_SDEBUG("Libqp objective = %f\n",problem_state.QP);
163  SG_SDEBUG("Exit code = %d\n",problem_state.exitflag);
164  SG_SDEBUG("%d iteration passed\n",problem_state.nIter);
165  SG_SDEBUG("Solution is \n [ ");
166  for (int i=0; i<n_tasks; i++)
167  SG_SDEBUG("%f ", x[i]);
168  SG_SDEBUG("]\n");
169  Map<VectorXd> Mzp_DiagSigz(x,n_tasks);
170  Mzp_Pz = eigensolver.eigenvectors().real();
171  Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose();
172  internal::set_is_malloc_allowed(false);
173  // walk in direction of antigradient
174  for (int i=0; i<n_tasks; i++)
175  Mzp_DiagSigz[i] += eta;
176  internal::set_is_malloc_allowed(true);
177  invEtaMWt = (Mzp_Pz*
178  (Mzp_DiagSigz.cwiseInverse().asDiagonal())*
179  Mzp_Pz.transpose())*
180  Wzp.transpose();
181  internal::set_is_malloc_allowed(false);
182  // compute objective at line search point
183  Fzp = 0.0;
184  for (task=0; task<n_tasks; task++)
185  {
186  SGVector<index_t> task_idx = options.tasks_indices[task];
187  int n_vecs_task = task_idx.vlen;
188  for (int i=0; i<n_vecs_task; i++)
189  {
190  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
191  double bb = CMath::max(aa,0.0);
192 
193  Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
194  }
195  }
196  Fzp += c*(Wzp*invEtaMWt).trace();
197 
198  // compute delta between line search point and search point
199  delta_Wzp = Wzp - Ws;
200  delta_Czp = Czp - Cs;
201  delta_Mzp = Mzp - Ms;
202 
203  // norms of delta
204  double nrm_delta_Wzp = delta_Wzp.squaredNorm();
205  double nrm_delta_Czp = delta_Czp.squaredNorm();
206  double nrm_delta_Mzp = delta_Mzp.squaredNorm();
207 
208  double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3;
209 
210  double Fzp_gamma = 0.0;
211  if (n_feats > n_tasks)
212  {
213  Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
214  (delta_Czp.transpose()*gCs).trace() +
215  (delta_Mzp.transpose()*gMs).trace() +
216  (gamma/2)*nrm_delta_Wzp +
217  (gamma/2)*nrm_delta_Czp +
218  (gamma/2)*nrm_delta_Mzp;
219  }
220  else
221  {
222  Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() +
223  (gCs.transpose()*delta_Czp).trace() +
224  (gMs.transpose()*delta_Mzp).trace() +
225  (gamma/2)*nrm_delta_Wzp +
226  (gamma/2)*nrm_delta_Czp +
227  (gamma/2)*nrm_delta_Mzp;
228  }
229 
230  // break if delta is getting too small
231  if (r_sum <= 1e-20)
232  {
233  done = true;
234  break;
235  }
236 
237  // break if objective at line searc point is smaller than Fzp_gamma
238  if (Fzp <= Fzp_gamma)
239  break;
240  else
241  gamma *= gamma_inc;
242 
243  inner_iter++;
244  }
245 
246  Wz_old = Wz;
247  Cz_old = Cz;
248  Mz_old = Mz;
249  Wz = Wzp;
250  Cz = Czp;
251  Mz = Mzp;
252 
253  // compute objective value
254  obj_old = obj;
255  obj = Fzp;
256 
257  // check if process should be terminated
258  switch (options.termination)
259  {
260  case 0:
261  if (iter>=2)
262  {
263  if ( CMath::abs(obj-obj_old) <= options.tolerance )
264  done = true;
265  }
266  break;
267  case 1:
268  if (iter>=2)
269  {
270  if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
271  done = true;
272  }
273  break;
274  case 2:
275  if (CMath::abs(obj) <= options.tolerance)
276  done = true;
277  break;
278  case 3:
279  if (iter>=options.max_iter)
280  done = true;
281  break;
282  }
283 
284  iter++;
285  t_old = t;
286  t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
287  }
288  internal::set_is_malloc_allowed(true);
289  SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj);
290 
292  SG_FREE(diag_H);
293  SG_FREE(f);
294  SG_FREE(a);
295  SG_FREE(lb);
296  SG_FREE(ub);
297  SG_FREE(x);
298 
299  SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
300  for (int i=0; i<n_feats; i++)
301  {
302  for (task=0; task<n_tasks; task++)
303  tasks_w(i,task) = Wzp(i,task);
304  }
305  //tasks_w.display_matrix();
306  SGVector<float64_t> tasks_c(n_tasks);
307  for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
308  return malsar_result_t(tasks_w, tasks_c);
309 };
310 };
311 #endif

SHOGUN Machine Learning Toolbox - Documentation