SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
LeastAngleRegression.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Chiyuan Zhang
8  * Copyright (C) 2012 Chiyuan Zhang
9  */
10 
11 #include <shogun/lib/config.h>
12 
13 #include <vector>
14 #include <limits>
15 #include <algorithm>
16 
22 
23 using namespace Eigen;
24 using namespace shogun;
25 using namespace std;
26 
27 CLeastAngleRegression::CLeastAngleRegression(bool lasso) :
28  CLinearMachine(), m_lasso(lasso),
29  m_max_nonz(0), m_max_l1_norm(0)
30 {
31  m_epsilon = CMath::MACHINE_EPSILON;
32  SG_ADD(&m_epsilon, "epsilon", "Epsilon for early stopping", MS_AVAILABLE);
33  SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE);
34  SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE);
35 }
36 
38 {
39 
40 }
41 
42 template <typename ST>
43 void CLeastAngleRegression::find_max_abs(const std::vector<ST> &vec, const std::vector<bool> &ignore_mask,
44  int32_t &imax, ST& vmax)
45 {
46  imax = -1;
47  vmax = -1;
48  for (size_t i=0; i < vec.size(); ++i)
49  {
50  if (ignore_mask[i])
51  continue;
52 
53  if (CMath::abs(vec[i]) > vmax)
54  {
55  vmax = CMath::abs(vec[i]);
56  imax = i;
57  }
58  }
59 }
60 
61 template <typename ST>
63  ST &y0, ST &y1, SGMatrix<ST> &G)
64 {
65  memset(G.matrix, 0, G.num_rows * G.num_cols * sizeof(ST));
66 
67  if (x1 == 0)
68  {
69  G(0, 0) = G(1, 1) = 1;
70  y0 = x0;
71  y1 = x1;
72  }
73  else
74  {
75  ST r = CMath::sqrt(x0*x0+x1*x1) ;
76  ST sx0 = x0 / r;
77  ST sx1 = x1 / r;
78 
79  G(0,0) = sx0;
80  G(1,0) = -sx1;
81  G(0,1) = sx1;
82  G(1,1) = sx0;
83 
84  y0 = r;
85  y1 = 0;
86  }
87 }
88 
90 {
91  REQUIRE(m_labels->get_label_type() == LT_REGRESSION, "Provided labels (%s) are of type (%d) - they should be regression labels (%d) instead.\n"
93 
94  if (!data)
95  {
96  REQUIRE(features, "No features provided.\n")
98  "Feature-class (%d) must be of type C_DENSE (%d)\n", features->get_feature_class(), C_DENSE)
99 
100  data = features;
101  }
102  else
103  REQUIRE(data->get_feature_class() == C_DENSE,
104  "Feature-class must be of type C_DENSE (%d)\n", data->get_feature_class(), C_DENSE)
105 
106  REQUIRE(data->get_num_vectors() == m_labels->get_num_labels(), "Number of training vectors (%d) does not match number of labels (%d)\n"
108 
109  //check for type of CFeatures, then call the appropriate template method
110  if(data->get_feature_type() == F_DREAL)
111  return CLeastAngleRegression::train_machine_templated((CDenseFeatures<float64_t> *) data);
112  else if(data->get_feature_type() == F_SHORTREAL)
113  return CLeastAngleRegression::train_machine_templated((CDenseFeatures<float32_t> *) data);
114  else if(data->get_feature_type() == F_LONGREAL)
115  return CLeastAngleRegression::train_machine_templated((CDenseFeatures<floatmax_t> *) data);
116  else
117  SG_ERROR("Feature-type (%d) must be of type F_SHORTREAL (%d), F_DREAL (%d) or F_LONGREAL (%d).\n",
119 
120  return false;
121 }
122 
123 template <typename ST>
124 bool CLeastAngleRegression::train_machine_templated(CDenseFeatures<ST> * data)
125 {
126  std::vector<std::vector<ST>> m_beta_path_t;
127 
128  int32_t n_fea = data->get_num_features();
129  int32_t n_vec = data->get_num_vectors();
130 
131  bool lasso_cond = false;
132  bool stop_cond = false;
133 
134  // init facilities
135  m_beta_idx.clear();
136  m_beta_path_t.clear();
137  m_num_active = 0;
138  m_active_set.clear();
139  m_is_active.resize(n_fea);
140  fill(m_is_active.begin(), m_is_active.end(), false);
141 
142  SGVector<ST> y = ((CRegressionLabels*) m_labels)->template get_labels_t<ST>();
143  typename SGVector<ST>::EigenVectorXtMap map_y(y.vector, y.size());
144 
145  // transpose(X) is more convenient to work with since we care
146  // about features here. After transpose, each row will be a data
147  // point while each column corresponds to a feature
148  SGMatrix<ST> X (n_vec, n_fea);
149  typename SGMatrix<ST>::EigenMatrixXtMap map_Xr = data->get_feature_matrix();
150  typename SGMatrix<ST>::EigenMatrixXtMap map_X(X.matrix, n_vec, n_fea);
151  map_X = map_Xr.transpose();
152 
153  SGMatrix<ST> X_active(n_vec, n_fea);
154 
155  // beta is the estimator
156  vector<ST> beta(n_fea);
157 
158  vector<ST> Xy(n_fea);
159  typename SGVector<ST>::EigenVectorXtMap map_Xy(&Xy[0], n_fea);
160  // Xy = X' * y
161  map_Xy=map_Xr*map_y;
162 
163  // mu is the prediction
164  vector<ST> mu(n_vec);
165 
166  // correlation
167  vector<ST> corr(n_fea);
168  // sign of correlation
169  vector<ST> corr_sign(n_fea);
170 
171  // Cholesky factorization R'R = X'X, R is upper triangular
172  SGMatrix<ST> R;
173 
174  ST max_corr = 1;
175  int32_t i_max_corr = 1;
176 
177  // first entry: all coefficients are zero
178  m_beta_path_t.push_back(beta);
179  m_beta_idx.push_back(0);
180 
181  //maximum allowed active variables at a time
182  int32_t max_active_allowed = CMath::min(n_vec-1, n_fea);
183 
184  //========================================
185  // main loop
186  //========================================
187  int32_t nloop=0;
188  while (m_num_active < max_active_allowed && max_corr/n_vec > m_epsilon && !stop_cond)
189  {
190  // corr = X' * (y-mu) = - X'*mu + Xy
191  typename SGVector<ST>::EigenVectorXtMap map_corr(&corr[0], n_fea);
192  typename SGVector<ST>::EigenVectorXtMap map_mu(&mu[0], n_vec);
193 
194  map_corr = map_Xy - (map_Xr*map_mu);
195 
196  // corr_sign = sign(corr)
197  for (size_t i=0; i < corr.size(); ++i)
198  corr_sign[i] = CMath::sign(corr[i]);
199 
200  // find max absolute correlation in inactive set
201  find_max_abs(corr, m_is_active, i_max_corr, max_corr);
202 
203  if (!lasso_cond)
204  {
205  // update Cholesky factorization matrix
206  if (m_num_active == 0)
207  {
208  // R isn't allocated yet
209  R=SGMatrix<ST>(1,1);
210  ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr));
211  R(0,0) = CMath::sqrt( diag_k);
212  }
213  else
214  R=cholesky_insert(X, X_active, R, i_max_corr, m_num_active);
215  activate_variable(i_max_corr);
216  }
217 
218  // Active variables
219  typename SGMatrix<ST>::EigenMatrixXtMap map_Xa(X_active.matrix, n_vec, m_num_active);
220  if (!lasso_cond)
221  map_Xa.col(m_num_active-1)=map_X.col(i_max_corr);
222 
223  SGVector<ST> corr_sign_a(m_num_active);
224  for (index_t i=0; i < m_num_active; ++i)
225  corr_sign_a[i] = corr_sign[m_active_set[i]];
226 
227  typename SGVector<ST>::EigenVectorXtMap map_corr_sign_a(corr_sign_a.vector, corr_sign_a.size());
228  typename SGMatrix<ST>::EigenMatrixXtMap map_R(R.matrix, R.num_rows, R.num_cols);
229  typename SGVector<ST>::EigenVectorXt solve = map_R.transpose().template triangularView<Lower>().template solve<OnTheLeft>(map_corr_sign_a);
230 
231  typename SGVector<ST>::EigenVectorXt GA1 = map_R.template triangularView<Upper>().template solve<OnTheLeft>(solve);
232 
233  // AA = 1/sqrt(GA1' * corr_sign_a)
234  ST AA = GA1.dot(map_corr_sign_a);
235  AA = 1/CMath::sqrt( AA);
236 
237  typename SGVector<ST>::EigenVectorXt wA = AA*GA1;
238 
239  // equiangular direction (unit vector)
240  vector<ST> u(n_vec);
241  typename SGVector<ST>::EigenVectorXtMap map_u(&u[0], n_vec);
242 
243  map_u = map_Xa*wA;
244 
245  ST gamma = max_corr / AA;
246  if (m_num_active < n_fea)
247  {
248  #pragma omp parallel for shared(gamma)
249  for (index_t i=0; i < n_fea; ++i)
250  {
251  if (m_is_active[i])
252  continue;
253 
254  // correlation between X[:,i] and u
255  ST dir_corr = map_u.dot(map_X.col(i));
256 
257  ST tmp1 = (max_corr-corr[i])/(AA-dir_corr);
258  ST tmp2 = (max_corr+corr[i])/(AA+dir_corr);
259  #pragma omp critical
260  {
261  if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma)
262  gamma = tmp1;
263  if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma)
264  gamma = tmp2;
265  }
266  }
267  }
268 
269  int32_t i_kick=-1;
270  int32_t i_change=i_max_corr;
271  if (m_lasso)
272  {
273  // lasso modification to further refine gamma
274  lasso_cond = false;
275  ST lasso_bound = numeric_limits<ST>::max();
276 
277  for (index_t i=0; i < m_num_active; ++i)
278  {
279  ST tmp = -beta[m_active_set[i]] / wA(i);
280  if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound)
281  {
282  lasso_bound = tmp;
283  i_kick = i;
284  }
285  }
286 
287  if (lasso_bound < gamma)
288  {
289  gamma = lasso_bound;
290  lasso_cond = true;
291  i_change = m_active_set[i_kick];
292  }
293  }
294 
295  // update prediction: mu = mu + gamma * u
296  map_mu += gamma*map_u;
297 
298  // update estimator
299  for (index_t i=0; i < m_num_active; ++i)
300  beta[m_active_set[i]] += gamma * wA(i);
301  // early stopping on max l1-norm
302  if (m_max_l1_norm > 0)
303  {
304  ST l1 = SGVector<ST>::onenorm(&beta[0], n_fea);
305  if (l1 > m_max_l1_norm)
306  {
307  // stopping with interpolated beta
308  stop_cond = true;
309  lasso_cond = false;
310  ST l1_prev = (ST) SGVector<ST>::onenorm(&m_beta_path_t[nloop][0], n_fea);
311  ST s = (m_max_l1_norm-l1_prev)/(l1-l1_prev);
312 
313  typename SGVector<ST>::EigenVectorXtMap map_beta(&beta[0], n_fea);
314  typename SGVector<ST>::EigenVectorXtMap map_beta_prev(&m_beta_path_t[nloop][0], n_fea);
315  map_beta = (1-s)*map_beta_prev + s*map_beta;
316  }
317  }
318 
319  // if lasso cond, drop the variable from active set
320  if (lasso_cond)
321  {
322  beta[i_change] = 0;
323  R=cholesky_delete(R, i_kick);
324  deactivate_variable(i_kick);
325 
326  // Remove column from active set
327  int32_t numRows = map_Xa.rows();
328  int32_t numCols = map_Xa.cols()-1;
329  if( i_kick < numCols )
330  map_Xa.block(0, i_kick, numRows, numCols-i_kick) =
331  map_Xa.block(0, i_kick+1, numRows, numCols-i_kick).eval();
332  }
333 
334  nloop++;
335  m_beta_path_t.push_back(beta);
336  if (size_t(m_num_active) >= m_beta_idx.size())
337  m_beta_idx.push_back(nloop);
338  else
339  m_beta_idx[m_num_active] = nloop;
340 
341  // early stopping with max number of non-zero variables
342  if (m_max_nonz > 0 && m_num_active >= m_max_nonz)
343  stop_cond = true;
344  SG_DEBUG("Added : %d , Dropped %d, Active set size %d max_corr %.17f \n", i_max_corr, i_kick, m_num_active, max_corr);
345  }
346 
347  //copy m_beta_path_t (of type ST) into m_beta_path
348  for(size_t i = 0; i < m_beta_path_t.size(); ++i)
349  {
350  std::vector<float64_t> va;
351  for(size_t p = 0; p < m_beta_path_t[i].size(); ++p){
352  va.push_back((float64_t) m_beta_path_t[i][p]);
353  }
354  m_beta_path.push_back(va);
355  }
356 
357  // assign default estimator
358  w.vlen = n_fea;
359  switch_w(m_beta_idx.size()-1);
360 
361  return true;
362 }
363 
364 template <typename ST>
366  const SGMatrix<ST>& X_active, SGMatrix<ST>& R, int32_t i_max_corr, int32_t num_active)
367 {
368  typename SGMatrix<ST>::EigenMatrixXtMap map_X(X.matrix, X.num_rows, X.num_cols);
369  typename SGMatrix<ST>::EigenMatrixXtMap map_X_active(X_active.matrix, X.num_rows, num_active);
370  ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr));
371 
372  // col_k is the k-th column of (X'X)
373  typename SGVector<ST>::EigenVectorXtMap map_i_max(X.get_column_vector(i_max_corr), X.num_rows);
374  typename SGVector<ST>::EigenVectorXt R_k = map_X_active.transpose()*map_i_max;
375  typename SGMatrix<ST>::EigenMatrixXtMap map_R(R.matrix, R.num_rows, R.num_cols);
376 
377  // R' * R_k = (X' * X)_k = col_k, solving to get R_k
378  map_R.transpose().template triangularView<Lower>().template solveInPlace<OnTheLeft>(R_k);
379  ST R_kk = CMath::sqrt(diag_k - R_k.dot(R_k));
380 
381  SGMatrix<ST> R_new(num_active+1, num_active+1);
382  typename SGMatrix<ST>::EigenMatrixXtMap map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols);
383 
384  map_R_new.block(0, 0, num_active, num_active) = map_R;
385  memcpy(R_new.matrix+num_active*(num_active+1), R_k.data(), sizeof(ST)*(num_active));
386  map_R_new.row(num_active).setZero();
387  map_R_new(num_active, num_active) = R_kk;
388  return R_new;
389 }
390 
391 template <typename ST>
393 {
394  if (i_kick != m_num_active-1)
395  {
396  // remove i_kick-th column
397  for (index_t j=i_kick; j < m_num_active-1; ++j)
398  for (index_t i=0; i < m_num_active; ++i)
399  R(i,j) = R(i,j+1);
400 
401  SGMatrix<ST> G(2,2);
402  for (index_t i=i_kick; i < m_num_active-1; ++i)
403  {
404  plane_rot(R(i,i),R(i+1,i), R(i,i), R(i+1,i), G);
405  if (i < m_num_active-2)
406  {
407  for (index_t k=i+1; k < m_num_active-1; ++k)
408  {
409  // R[i:i+1, k] = G*R[i:i+1, k]
410  ST Rik = R(i,k), Ri1k = R(i+1,k);
411  R(i,k) = G(0,0)*Rik + G(0,1)*Ri1k;
412  R(i+1,k) = G(1,0)*Rik+G(1,1)*Ri1k;
413  }
414  }
415  }
416  }
417 
418  SGMatrix<ST> nR(m_num_active-1, m_num_active-1);
419  for (index_t i=0; i < m_num_active-1; ++i)
420  for (index_t j=0; j < m_num_active-1; ++j)
421  nR(i,j) = R(i,j);
422 
423  return nR;
424 }
425 
426 template bool CLeastAngleRegression::train_machine_templated<float64_t>(CDenseFeatures<float64_t> * data);
427 template bool CLeastAngleRegression::train_machine_templated<float32_t>(CDenseFeatures<float32_t> * data);
virtual const char * get_name() const =0
static const float64_t MACHINE_EPSILON
Definition: Math.h:2058
SGMatrix< ST > cholesky_delete(SGMatrix< ST > &R, int32_t i_kick)
virtual ELabelType get_label_type() const =0
Real Labels are real-valued labels.
int32_t get_num_features() const
int32_t index_t
Definition: common.h:62
virtual int32_t get_num_labels() const =0
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
SGMatrix< ST > get_feature_matrix()
Definition: SGMatrix.h:20
virtual int32_t get_num_vectors() const =0
Definition: basetag.h:132
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
static void find_max_abs(const std::vector< ST > &vec, const std::vector< bool > &ignore_mask, int32_t &imax, ST &vmax)
index_t num_rows
Definition: SGMatrix.h:374
index_t vlen
Definition: SGVector.h:494
static void plane_rot(ST x0, ST x1, ST &y0, ST &y1, SGMatrix< ST > &G)
void switch_w(int32_t num_variable)
virtual int32_t get_num_vectors() const
shogun vector
double float64_t
Definition: common.h:50
SGVector< float64_t > w
virtual EFeatureClass get_feature_class() const =0
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:113
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
CDotFeatures * features
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
SGMatrix< ST > cholesky_insert(const SGMatrix< ST > &X, const SGMatrix< ST > &X_active, SGMatrix< ST > &R, int32_t i_max_corr, int32_t num_active)
static T sign(T a)
Definition: Math.h:426
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:157
static float64_t onenorm(T *x, int32_t len)
|| x ||_1
Definition: SGVector.cpp:713
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
#define SG_ADD(...)
Definition: SGObject.h:84
static float32_t sqrt(float32_t x)
Definition: Math.h:459
T * data() const
Definition: SGVector.h:116
virtual EFeatureType get_feature_type() const =0
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation