SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
malsar_clustered.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
9  */
10 
12 #ifdef USE_GPL_SHOGUN
13 
14 #ifndef HAVE_CXX11
17 #include <iostream>
18 #include <shogun/lib/external/libqp.h>
19 
20 using namespace Eigen;
21 using namespace std;
22 
23 namespace shogun
24 {
25 
26 static double* H_diag_matrix;
27 static int H_diag_matrix_ld;
28 
29 static const double* get_col(uint32_t j)
30 {
31  return H_diag_matrix + j*H_diag_matrix_ld;
32 }
33 
34 malsar_result_t malsar_clustered(
35  CDotFeatures* features,
36  double* y,
37  double rho1,
38  double rho2,
39  const malsar_options& options)
40 {
41  int task;
42  int n_feats = features->get_dim_feature_space();
43  SG_SDEBUG("n feats = %d\n", n_feats)
44  int n_vecs = features->get_num_vectors();
45  SG_SDEBUG("n vecs = %d\n", n_vecs)
46  int n_tasks = options.n_tasks;
47  SG_SDEBUG("n tasks = %d\n", n_tasks)
48 
49  H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks);
50  for (int i=0; i<n_tasks; i++)
51  H_diag_matrix[i*n_tasks+i] = 2.0;
52  H_diag_matrix_ld = n_tasks;
53 
54  int iter = 0;
55 
56  // initialize weight vector and bias for each task
57  MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
58  VectorXd Cs = VectorXd::Zero(n_tasks);
59  MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks;
60  MatrixXd IM = Ms;
61  MatrixXd IMsqinv = Ms;
62  MatrixXd invEtaMWt = Ms;
63 
64  MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
65  VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
66  MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms;
67  MatrixXd Mzp_Pz;
68 
69  double eta = rho2/rho1;
70  double c = rho1*eta*(1+eta);
71 
72  double t=1, t_old=0;
73  double gamma=1, gamma_inc=2;
74  double obj=0.0, obj_old=0.0;
75 
76  double* diag_H = SG_MALLOC(double, n_tasks);
77  double* f = SG_MALLOC(double, n_tasks);
78  double* a = SG_MALLOC(double, n_tasks);
79  double* lb = SG_MALLOC(double, n_tasks);
80  double* ub = SG_MALLOC(double, n_tasks);
81  double* x = SG_CALLOC(double, n_tasks);
82 
83  //internal::set_is_malloc_allowed(false);
84  bool done = false;
85  while (!done && iter <= options.max_iter)
86  {
87  double alpha = double(t_old - 1)/t;
88  SG_SDEBUG("alpha=%f\n",alpha)
89 
90  // compute search point
91  Ws = (1+alpha)*Wz - alpha*Wz_old;
92  Cs = (1+alpha)*Cz - alpha*Cz_old;
93  Ms = (1+alpha)*Mz - alpha*Mz_old;
94 
95  // zero gradient
96  gWs.setZero();
97  gCs.setZero();
98  //internal::set_is_malloc_allowed(true);
99  SG_SDEBUG("Computing gradient\n")
100  IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms);
101  IMsqinv = (IM*IM).inverse();
102  invEtaMWt = IM.inverse()*Ws.transpose();
103  gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv;
104  gWs.noalias() += 2*c*invEtaMWt.transpose();
105  //internal::set_is_malloc_allowed(false);
106 
107  // compute gradient and objective at search point
108  double Fs = 0;
109  for (task=0; task<n_tasks; task++)
110  {
111  SGVector<index_t> task_idx = options.tasks_indices[task];
112  int n_vecs_task = task_idx.vlen;
113  for (int i=0; i<n_vecs_task; i++)
114  {
115  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
116  double bb = CMath::max(aa,0.0);
117 
118  // avoid underflow when computing exponential loss
119  Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
120  double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task;
121  gCs[task] += b;
122  features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
123  }
124  }
125  SG_SDEBUG("Computed gradient\n")
126 
127  // add regularizer
128  Fs += c*(Ws*invEtaMWt).trace();
129  SG_SDEBUG("Fs = %f \n", Fs)
130 
131  double Fzp = 0.0;
132 
133  int inner_iter = 0;
134  // line search, Armijo-Goldstein scheme
135  while (inner_iter <= 1)
136  {
137  Wzp = Ws - gWs/gamma;
138  Czp = Cs - gCs/gamma;
139  // compute singular projection of Ms - gMs/gamma with k
140  //internal::set_is_malloc_allowed(true);
141  EigenSolver<MatrixXd> eigensolver;
142  eigensolver.compute(Ms-gMs/gamma, true);
143  if (eigensolver.info()!=Eigen::Success)
144  SG_SERROR("Eigendecomposition failed")
145 
146  // solve problem
147  // min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1
148  for (int i=0; i<n_tasks; i++)
149  {
150  diag_H[i] = 2.0;
151  // TODO fails with C++11
152  //std::complex<MatrixXd::Scalar> eigenvalue = eigensolver.eigenvalues()[i];
153  //cout << "eigenvalue " << eigenvalue << "=" << std::real(eigenvalue) << "+i" << std::imag(eigenvalue) << endl;
154  f[i] = -2*eigensolver.eigenvalues()[i].real();
155  if (f[i]!=f[i])
156  SG_SERROR("NaN %d eigenvalue", i)
157 
158  SG_SDEBUG("%dth eigenvalue %f\n",i,eigensolver.eigenvalues()[i].real())
159  a[i] = 1.0;
160  lb[i] = 0.0;
161  ub[i] = 1.0;
162  x[i] = double(options.n_clusters)/n_tasks;
163  }
164  double b = options.n_clusters;//eigensolver.eigenvalues().sum().real();
165  SG_SDEBUG("b = %f\n", b)
166  SG_SDEBUG("Calling libqp\n")
167  libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL);
168  SG_SDEBUG("Libqp objective = %f\n",problem_state.QP)
169  SG_SDEBUG("Exit code = %d\n",problem_state.exitflag)
170  SG_SDEBUG("%d iteration passed\n",problem_state.nIter)
171  SG_SDEBUG("Solution is \n [ ")
172  for (int i=0; i<n_tasks; i++)
173  SG_SDEBUG("%f ", x[i])
174  SG_SDEBUG("]\n")
175  Map<VectorXd> Mzp_DiagSigz(x,n_tasks);
176  Mzp_Pz = eigensolver.eigenvectors().real();
177  Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose();
178  //internal::set_is_malloc_allowed(false);
179  // walk in direction of antigradient
180  for (int i=0; i<n_tasks; i++)
181  Mzp_DiagSigz[i] += eta;
182  //internal::set_is_malloc_allowed(true);
183  invEtaMWt = (Mzp_Pz*
184  (Mzp_DiagSigz.cwiseInverse().asDiagonal())*
185  Mzp_Pz.transpose())*
186  Wzp.transpose();
187  //internal::set_is_malloc_allowed(false);
188  // compute objective at line search point
189  Fzp = 0.0;
190  for (task=0; task<n_tasks; task++)
191  {
192  SGVector<index_t> task_idx = options.tasks_indices[task];
193  int n_vecs_task = task_idx.vlen;
194  for (int i=0; i<n_vecs_task; i++)
195  {
196  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]);
197  double bb = CMath::max(aa,0.0);
198 
199  Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task;
200  }
201  }
202  Fzp += c*(Wzp*invEtaMWt).trace();
203 
204  // compute delta between line search point and search point
205  delta_Wzp = Wzp - Ws;
206  delta_Czp = Czp - Cs;
207  delta_Mzp = Mzp - Ms;
208 
209  // norms of delta
210  double nrm_delta_Wzp = delta_Wzp.squaredNorm();
211  double nrm_delta_Czp = delta_Czp.squaredNorm();
212  double nrm_delta_Mzp = delta_Mzp.squaredNorm();
213 
214  double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3;
215 
216  double Fzp_gamma = 0.0;
217  if (n_feats > n_tasks)
218  {
219  Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
220  (delta_Czp.transpose()*gCs).trace() +
221  (delta_Mzp.transpose()*gMs).trace() +
222  (gamma/2)*nrm_delta_Wzp +
223  (gamma/2)*nrm_delta_Czp +
224  (gamma/2)*nrm_delta_Mzp;
225  }
226  else
227  {
228  Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() +
229  (gCs.transpose()*delta_Czp).trace() +
230  (gMs.transpose()*delta_Mzp).trace() +
231  (gamma/2)*nrm_delta_Wzp +
232  (gamma/2)*nrm_delta_Czp +
233  (gamma/2)*nrm_delta_Mzp;
234  }
235 
236  // break if delta is getting too small
237  if (r_sum <= 1e-20)
238  {
239  done = true;
240  break;
241  }
242 
243  // break if objective at line searc point is smaller than Fzp_gamma
244  if (Fzp <= Fzp_gamma)
245  break;
246  else
247  gamma *= gamma_inc;
248 
249  inner_iter++;
250  }
251 
252  Wz_old = Wz;
253  Cz_old = Cz;
254  Mz_old = Mz;
255  Wz = Wzp;
256  Cz = Czp;
257  Mz = Mzp;
258 
259  // compute objective value
260  obj_old = obj;
261  obj = Fzp;
262 
263  // check if process should be terminated
264  switch (options.termination)
265  {
266  case 0:
267  if (iter>=2)
268  {
269  if ( CMath::abs(obj-obj_old) <= options.tolerance )
270  done = true;
271  }
272  break;
273  case 1:
274  if (iter>=2)
275  {
276  if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
277  done = true;
278  }
279  break;
280  case 2:
281  if (CMath::abs(obj) <= options.tolerance)
282  done = true;
283  break;
284  case 3:
285  if (iter>=options.max_iter)
286  done = true;
287  break;
288  }
289 
290  iter++;
291  t_old = t;
292  t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
293  }
294  //internal::set_is_malloc_allowed(true);
295  SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
296 
297  SG_FREE(H_diag_matrix);
298  SG_FREE(diag_H);
299  SG_FREE(f);
300  SG_FREE(a);
301  SG_FREE(lb);
302  SG_FREE(ub);
303  SG_FREE(x);
304 
305  SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
306  for (int i=0; i<n_feats; i++)
307  {
308  for (task=0; task<n_tasks; task++)
309  tasks_w(i,task) = Wzp(i,task);
310  }
311  //tasks_w.display_matrix();
312  SGVector<float64_t> tasks_c(n_tasks);
313  for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
314  return malsar_result_t(tasks_w, tasks_c);
315 };
316 };
317 #endif
318 #endif //USE_GPL_SHOGUN
Definition: SGMatrix.h:20
Definition: basetag.h:132
double float64_t
Definition: common.h:50
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:168
#define SG_SERROR(...)
Definition: SGIO.h:179
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68

SHOGUN Machine Learning Toolbox - Documentation