SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
malsar_low_rank.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2012 Jiayu Zhou and Jieping Ye
9  */
10 
12 #ifdef HAVE_EIGEN3
15 #include <iostream>
16 
17 using namespace Eigen;
18 
19 namespace shogun
20 {
21 
22 malsar_result_t malsar_low_rank(
23  CDotFeatures* features,
24  double* y,
25  double rho,
26  const malsar_options& options)
27 {
28  int task;
29  int n_feats = features->get_dim_feature_space();
30  SG_SDEBUG("n feats = %d\n", n_feats)
31  int n_vecs = features->get_num_vectors();
32  SG_SDEBUG("n vecs = %d\n", n_vecs)
33  int n_tasks = options.n_tasks;
34  SG_SDEBUG("n tasks = %d\n", n_tasks)
35 
36  int iter = 0;
37 
38  // initialize weight vector and bias for each task
39  MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks);
40  VectorXd Cs = VectorXd::Zero(n_tasks);
41  MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws;
42  VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs;
43 
44  double t=1, t_old=0;
45  double gamma=1, gamma_inc=2;
46  double obj=0.0, obj_old=0.0;
47 
48  double rho_L2 = 0.0;
49 
50  //internal::set_is_malloc_allowed(false);
51  bool done = false;
52  while (!done && iter <= options.max_iter)
53  {
54  double alpha = double(t_old - 1)/t;
55 
56  // compute search point
57  Ws = (1+alpha)*Wz - alpha*Wz_old;
58  Cs = (1+alpha)*Cz - alpha*Cz_old;
59 
60  // zero gradient
61  gWs.setZero();
62  gCs.setZero();
63 
64  // compute gradient and objective at search point
65  double Fs = 0;
66  for (task=0; task<n_tasks; task++)
67  {
68  SGVector<index_t> task_idx = options.tasks_indices[task];
69  int n_task_vecs = task_idx.vlen;
70  for (int i=0; i<n_task_vecs; i++)
71  {
72  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]);
73  double bb = CMath::max(aa,0.0);
74 
75  // avoid underflow when computing exponential loss
76  Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
77  double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs;
78 
79  gCs[task] += b;
80  features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats);
81  }
82  }
83  gWs.noalias() += 2*rho_L2*Ws;
84  //SG_SDEBUG("gWs=%f\n",gWs.squaredNorm())
85 
86  // add regularizer
87  Fs += rho_L2*Ws.squaredNorm();
88 
89  double Fzp = 0.0;
90 
91  int inner_iter = 0;
92  // line search, Armijo-Goldstein scheme
93  while (inner_iter <= 1000)
94  {
95  // compute trace projection of Ws - gWs/gamma with 2*rho/gamma
96  //internal::set_is_malloc_allowed(true);
97  Wzp.setZero();
98  JacobiSVD<MatrixXd> svd((Ws - gWs/gamma).transpose(),ComputeThinU | ComputeThinV);
99  for (int i=0; i<svd.singularValues().size(); i++)
100  {
101  if (svd.singularValues()[i] > rho/gamma)
102  Wzp += (svd.matrixU().col(i)*
103  svd.singularValues()[i]*
104  svd.matrixV().col(i).transpose()).transpose();
105  }
106  //internal::set_is_malloc_allowed(false);
107  // walk in direction of antigradient
108  Czp = Cs - gCs/gamma;
109 
110  // compute objective at line search point
111  Fzp = 0.0;
112  for (task=0; task<n_tasks; task++)
113  {
114  SGVector<index_t> task_idx = options.tasks_indices[task];
115  int n_task_vecs = task_idx.vlen;
116  for (int i=0; i<n_task_vecs; i++)
117  {
118  double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]);
119  double bb = CMath::max(aa,0.0);
120 
121  Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs;
122  }
123  }
124  Fzp += rho_L2*Wzp.squaredNorm();
125 
126  // compute delta between line search point and search point
127  delta_Wzp = Wzp - Ws;
128  delta_Czp = Czp - Cs;
129 
130  // norms of delta
131  double nrm_delta_Wzp = delta_Wzp.squaredNorm();
132  double nrm_delta_Czp = delta_Czp.squaredNorm();
133 
134  double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2;
135 
136  double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() +
137  (delta_Czp.transpose()*gCs).trace() +
138  (gamma/2)*nrm_delta_Wzp +
139  (gamma/2)*nrm_delta_Czp;
140 
141  // break if delta is getting too small
142  if (r_sum <= 1e-20)
143  {
144  done = true;
145  break;
146  }
147 
148  // break if objective at line search point is smaller than Fzp_gamma
149  if (Fzp <= Fzp_gamma)
150  break;
151  else
152  gamma *= gamma_inc;
153  }
154 
155  Wz_old = Wz;
156  Cz_old = Cz;
157  Wz = Wzp;
158  Cz = Czp;
159 
160  // compute objective value
161  obj_old = obj;
162  obj = Fzp;
163  //internal::set_is_malloc_allowed(true);
164  JacobiSVD<MatrixXd> svd(Wzp, EigenvaluesOnly);
165  obj += rho*svd.singularValues().sum();
166  //internal::set_is_malloc_allowed(false);
167 
168 
169  // check if process should be terminated
170  switch (options.termination)
171  {
172  case 0:
173  if (iter>=2)
174  {
175  if ( CMath::abs(obj-obj_old) <= options.tolerance )
176  done = true;
177  }
178  break;
179  case 1:
180  if (iter>=2)
181  {
182  if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old))
183  done = true;
184  }
185  break;
186  case 2:
187  if (CMath::abs(obj) <= options.tolerance)
188  done = true;
189  break;
190  case 3:
191  if (iter>=options.max_iter)
192  done = true;
193  break;
194  }
195 
196  iter++;
197  t_old = t;
198  t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t));
199  }
200  //internal::set_is_malloc_allowed(true);
201  SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj)
202 
203  SGMatrix<float64_t> tasks_w(n_feats, n_tasks);
204  for (int i=0; i<n_feats; i++)
205  {
206  for (task=0; task<n_tasks; task++)
207  tasks_w(i,task) = Wzp(i,task);
208  }
209  SGVector<float64_t> tasks_c(n_tasks);
210  for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i];
211  return malsar_result_t(tasks_w, tasks_c);
212 };
213 };
214 #endif

SHOGUN Machine Learning Toolbox - Documentation