SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LMNNImpl.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Fernando J. Iglesias Garcia
8  * Copyright (C) 2013 Fernando J. Iglesias Garcia
9  */
10 
11 #ifdef HAVE_EIGEN3
12 #ifdef HAVE_LAPACK
13 
14 #include <shogun/metric/LMNNImpl.h>
15 #include <shogun/multiclass/KNN.h>
18 
19 #include <iterator>
20 
22 
23 // column-wise sum of the squared elements of a matrix
24 #define SUMSQCOLS(A) ((A).array().square().colwise().sum())
25 
26 using namespace shogun;
27 using namespace Eigen;
28 
29 CImpostorNode::CImpostorNode(index_t ex, index_t tar, index_t imp)
30 : example(ex), target(tar), impostor(imp)
31 {
32 }
33 
34 bool CImpostorNode::operator<(const CImpostorNode& rhs) const
35 {
36  if (example == rhs.example)
37  {
38  if (target == rhs.target)
39  return impostor < rhs.impostor;
40  else
41  return target < rhs.target;
42  }
43  else
44  return example < rhs.example;
45 }
46 
47 void CLMNNImpl::check_training_setup(CFeatures* features, const CLabels* labels,
48  SGMatrix<float64_t>& init_transform)
49 {
50  REQUIRE(features->has_property(FP_DOT),
51  "LMNN can only be applied to features that support dot products\n")
53  "LMNN supports only MulticlassLabels\n")
54  REQUIRE(labels->get_num_labels()==features->get_num_vectors(),
55  "The number of feature vectors must be equal to the number of labels\n")
56  //FIXME this requirement should be dropped in the future
57  REQUIRE(features->get_feature_class()==C_DENSE,
58  "Currently, LMNN supports only DenseFeatures\n")
59 
60  // cast is safe, we ensure above that features are dense
61  CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(features);
62 
64  if (init_transform.num_rows==0)
65  init_transform = CLMNNImpl::compute_pca_transform(x);
66 
67  REQUIRE(init_transform.num_rows==x->get_num_features() &&
68  init_transform.num_rows==init_transform.num_cols,
69  "The initial transform must be a square matrix of size equal to the "
70  "number of features\n")
71 }
72 
73 SGMatrix<index_t> CLMNNImpl::find_target_nn(CDenseFeatures<float64_t>* x,
74  CMulticlassLabels* y, int32_t k)
75 {
76  SG_SDEBUG("Entering CLMNNImpl::find_target_nn().\n")
77 
78  // get the number of features
79  int32_t d = x->get_num_features();
80  SGMatrix<index_t> target_neighbors(k, x->get_num_vectors());
81  SGVector<float64_t> unique_labels = y->get_unique_labels();
82  CDenseFeatures<float64_t>* features_slice = new CDenseFeatures<float64_t>();
83  CMulticlassLabels* labels_slice = new CMulticlassLabels();
84  SGVector<index_t> idxsmap(x->get_num_vectors());
85 
86  // increase ref count because a KNN instance will be created for each slice, and it
87  // decreses the ref count of its labels and features when destroyed
88  SG_REF(features_slice)
89  SG_REF(labels_slice)
90 
91  for (index_t i = 0; i < unique_labels.vlen; ++i)
92  {
93  int32_t slice_size = 0;
94  for (int32_t j = 0; j < y->get_num_labels(); ++j)
95  {
96  if (y->get_label(j)==unique_labels[i])
97  {
98  idxsmap[slice_size] = j;
99  ++slice_size;
100  }
101  }
102 
103  MatrixXd slice_mat(d, slice_size);
104  for (int32_t j = 0; j < slice_size; ++j)
105  slice_mat.col(j) = Map<const VectorXd>(x->get_feature_vector(idxsmap[j]).vector, d);
106 
107  features_slice->set_feature_matrix(SGMatrix<float64_t>(slice_mat.data(), d, slice_size, false));
108 
109  //FIXME the labels are not actually necessary to get the nearest neighbors, the
110  //features suffice. The labels are needed when we want to classify.
111  SGVector<float64_t> labels_vec(slice_size);
112  labels_vec.set_const(unique_labels[i]);
113  labels_slice->set_labels(labels_vec);
114 
115  CKNN* knn = new CKNN(k+1, new CEuclideanDistance(features_slice, features_slice), labels_slice);
116  SGMatrix<int32_t> target_slice = knn->nearest_neighbors();
117  // sanity check
118  ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size)
119 
120  for (index_t j = 0; j < target_slice.num_cols; ++j)
121  {
122  for (index_t l = 1; l < target_slice.num_rows; ++l)
123  target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ];
124  }
125 
126  // clean up knn
127  SG_UNREF(knn)
128  }
129 
130  // clean up features and labels
131  SG_UNREF(features_slice)
132  SG_UNREF(labels_slice)
133 
134  SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n")
135 
136  return target_neighbors;
137 }
138 
139 MatrixXd CLMNNImpl::sum_outer_products(CDenseFeatures<float64_t>* x, const SGMatrix<index_t> target_nn)
140 {
141  // get the number of features
142  int32_t d = x->get_num_features();
143  // initialize the sum of outer products (sop)
144  MatrixXd sop(d,d);
145  sop.setZero();
146  // map the feature matrix (each column is a feature vector) to an Eigen matrix
147  Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, x->get_num_vectors());
148 
149  // sum the outer products stored in C using the indices specified in target_nn
150  for (index_t i = 0; i < target_nn.num_cols; ++i)
151  {
152  for (index_t j = 0; j < target_nn.num_rows; ++j)
153  {
154  VectorXd dx = X.col(i) - X.col(target_nn(j,i));
155  sop += dx*dx.transpose();
156  }
157  }
158 
159  return sop;
160 }
161 
162 ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures<float64_t>* x,
163  CMulticlassLabels* y, const MatrixXd& L, const SGMatrix<index_t> target_nn,
164  const uint32_t iter, const uint32_t correction)
165 {
166  SG_SDEBUG("Entering CLMNNImpl::find_impostors().\n")
167 
168  // get the number of examples from data
169  int32_t n = x->get_num_vectors();
170  // get the number of features
171  int32_t d = x->get_num_features();
172  // get the number of neighbors
173  int32_t k = target_nn.num_rows;
174 
175  // map the feature matrix (each column is a feature vector) to an Eigen matrix
176  Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, n);
177  // transform the feature vectors
178  MatrixXd LX = L*X;
179 
180  // compute square distances plus margin from examples to target neighbors
181  MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn);
182 
183  // initialize impostors set
184  ImpostorsSetType N;
185  // exact impostors set shared between calls to this method
186  static ImpostorsSetType Nexact;
187 
188  // impostors search
189  REQUIRE(correction>0, "The number of iterations between exact updates of the "
190  "impostors set must be greater than 0\n")
191  if ((iter % correction)==0)
192  {
193  Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k);
194  N = Nexact;
195  }
196  else
197  {
198  N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn);
199  }
200 
201  SG_SDEBUG("Leaving CLMNNImpl::find_impostors().\n")
202 
203  return N;
204 }
205 
206 void CLMNNImpl::update_gradient(CDenseFeatures<float64_t>* x, MatrixXd& G,
207  const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization)
208 {
209  // compute the difference sets
210  ImpostorsSetType Np_Nc, Nc_Np;
211  set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin()));
212  set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin()));
213 
214  // map the feature matrix (each column is a feature vector) to an Eigen matrix
215  Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors());
216 
217  // remove the gradient contributions of the impostors that were in the previous
218  // set but disappeared in the current
219  for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it)
220  {
221  VectorXd dx1 = X.col(it->example) - X.col(it->target);
222  VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
223  G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
224  }
225 
226  // add the gradient contributions of the new impostors
227  for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it)
228  {
229  VectorXd dx1 = X.col(it->example) - X.col(it->target);
230  VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
231  G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
232  }
233 }
234 
235 void CLMNNImpl::gradient_step(MatrixXd& L, const MatrixXd& G, float64_t stepsize, bool diagonal)
236 {
237  if (diagonal)
238  {
239  // compute M as the square of L
240  MatrixXd M = L.transpose()*L;
241  // do step in M along the gradient direction
242  M -= stepsize*G;
243  // keep only the elements in the diagonal of M
244  VectorXd m = M.diagonal();
245 
246  VectorXd zero;
247  zero.resize(m.size());
248  zero.setZero();
249 
250  // return to representation in L
251  VectorXd l = m.array().max(zero.array()).array().sqrt();
252  L = l.asDiagonal();
253  }
254  else
255  {
256  // do step in L along the gradient direction (no need to project M then)
257  L -= stepsize*(2*L*G);
258  }
259 }
260 
261 void CLMNNImpl::correct_stepsize(float64_t& stepsize, const SGVector<float64_t> obj, const uint32_t iter)
262 {
263  if (iter > 0)
264  {
265  // Difference between current and previous objective
266  float64_t delta = obj[iter] - obj[iter-1];
267 
268  if (delta > 0)
269  {
270  // The objective has increased, we have probably jumped over the optimum,
271  // thus, decrease the step size
272  stepsize *= 0.5;
273  }
274  else
275  {
276  // The objective has decreased, we are in the right direction,
277  // increase the step size
278  stepsize *= 1.01;
279  }
280  }
281 }
282 
283 bool CLMNNImpl::check_termination(float64_t stepsize, const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold)
284 {
285  if (iter >= maxiter-1)
286  {
287  SG_SWARNING("Maximum number of iterations reached before convergence.");
288  return true;
289  }
290 
291  if (stepsize < stepsize_threshold)
292  {
293  SG_SDEBUG("Step size too small to make more progress. Convergence reached.\n");
294  return true;
295  }
296 
297  if (iter >= 10)
298  {
299  for (int32_t i = 0; i < 3; ++i)
300  {
301  if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold)
302  return false;
303  }
304 
305  SG_SDEBUG("No more progress in the objective. Convergence reached.\n");
306  return true;
307  }
308 
309  // For the rest of the cases, do not stop LMNN.
310  return false;
311 }
312 
313 SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>* features)
314 {
315  SG_SDEBUG("Initializing LMNN transform using PCA.\n");
316 
317  // Substract the mean of the features
318  // Create clone of the features to keep the input features unmodified
319  CDenseFeatures<float64_t>* cloned_features =
321  CPruneVarSubMean* mean_substractor =
322  new CPruneVarSubMean(false); // false to avoid variance normalization
323  mean_substractor->init(cloned_features);
324  mean_substractor->apply_to_feature_matrix(cloned_features);
325 
326  // Obtain the linear transform applying PCA
327  CPCA* pca = new CPCA();
328  pca->set_target_dim(cloned_features->get_num_features());
329  pca->init(cloned_features);
330  SGMatrix<float64_t> pca_transform = pca->get_transformation_matrix();
331 
332  SG_UNREF(pca);
333  SG_UNREF(mean_substractor);
334  SG_UNREF(cloned_features);
335 
336  return pca_transform;
337 }
338 
339 MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix<index_t> target_nn)
340 {
341  // get the number of examples
342  ASSERT(LX.cols()==target_nn.num_cols)
343  int32_t n = LX.cols();
344  // get the number of features
345  int32_t d = LX.rows();
346  // get the number of neighbors
347  int32_t k = target_nn.num_rows;
348 
350 
351  // create Shogun features from LX to later apply subset
352  SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
354 
355  // initialize distances
356  MatrixXd sqdists(k,n);
357  sqdists.setZero();
358  for (int32_t i = 0; i < k; ++i)
359  {
360  //FIXME avoid copying the rows of target_nn and access them directly. Maybe
361  //find_target_nn should be changed to return the output transposed wrt how it is
362  //done atm.
363  SGVector<index_t> subset_vec = target_nn.get_row_vector(i);
364  lx->add_subset(subset_vec);
365  // after the subset, there are still n columns, i.e. the subset is used to
366  // modify the order of the columns in x according to the target neighbors
367  sqdists.row(i) = SUMSQCOLS(LX - Map<const MatrixXd>(lx->get_feature_matrix().matrix, d, n)) + 1;
368  lx->remove_subset();
369  }
370 
371  // clean up features used to apply subset
372  SG_UNREF(lx);
373 
374  return sqdists;
375 }
376 
377 ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX, const MatrixXd& sqdists,
378  CMulticlassLabels* y, const SGMatrix<index_t> target_nn, int32_t k)
379 {
380  SG_SDEBUG("Entering CLMNNImpl::find_impostors_exact().\n")
381 
382  // initialize empty impostors set
383  ImpostorsSetType N = ImpostorsSetType();
384 
385  // get the number of examples from data
386  int32_t n = LX.cols();
387  // get the number of features
388  int32_t d = LX.rows();
389  // create Shogun features from LX to later apply subset
390  SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
391  CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat);
392 
393  // get a vector with unique label values
394  SGVector<float64_t> unique = y->get_unique_labels();
395 
396  // for each label except from the largest one
397  for (index_t i = 0; i < unique.vlen-1; ++i)
398  {
399  // get the indices of the examples labelled as unique[i]
400  std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]);
401  // get the indices of the examples that have a larger label value, so that
402  // pairwise distances are computed once
403  std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]);
404 
405  // get distance with features indexed by iidxs and gtidxs separated
406  CEuclideanDistance* euclidean = CLMNNImpl::setup_distance(lx,iidxs,gtidxs);
407  euclidean->set_disable_sqrt(true);
408 
409  for (int32_t j = 0; j < k; ++j)
410  {
411  for (std::size_t ii = 0; ii < iidxs.size(); ++ii)
412  {
413  for (std::size_t jj = 0; jj < gtidxs.size(); ++jj)
414  {
415  // FIXME study if using upper bounded distances can be an improvement
416  float64_t distance = euclidean->distance(ii,jj);
417 
418  if (distance <= sqdists(j,iidxs[ii]))
419  N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) );
420 
421  if (distance <= sqdists(j,gtidxs[jj]))
422  N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) );
423  }
424  }
425  }
426 
427  SG_UNREF(euclidean);
428  }
429 
430  SG_UNREF(lx);
431 
432  SG_SDEBUG("Leaving CLMNNImpl::find_impostors_exact().\n")
433 
434  return N;
435 }
436 
437 ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& sqdists,
438  const ImpostorsSetType& Nexact, const SGMatrix<index_t> target_nn)
439 {
440  SG_SDEBUG("Entering CLMNNImpl::find_impostors_approx().\n")
441 
442  // initialize empty impostors set
443  ImpostorsSetType N = ImpostorsSetType();
444 
445  // compute square distances from examples to impostors
446  SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact);
447 
448  // find in the exact set of impostors computed last, the triplets that remain impostors
449  index_t i = 0;
450  for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
451  {
452  // find in target_nn(:,it->example) the position of the target neighbor it->target
453  index_t target_idx = 0;
454  while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows)
455  ++target_idx;
456 
457  REQUIRE(target_idx<target_nn.num_rows, "The index of the target neighbour in the "
458  "impostors set was not found in the target neighbours matrix. "
459  "There must be a bug in find_impostors_exact.\n")
460 
461  if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) )
462  N.insert(*it);
463  }
464 
465  SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n")
466 
467  return N;
468 }
469 
470 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(MatrixXd& LX, const ImpostorsSetType& Nexact)
471 {
472  // get the number of examples
473  int32_t n = LX.cols();
474  // get the number of features
475  int32_t d = LX.rows();
476  // get the number of impostors
477  size_t num_impostors = Nexact.size();
478 
480 
481  // create Shogun features from LX and distance
482  SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
484  CEuclideanDistance* euclidean = new CEuclideanDistance(lx,lx);
485  euclidean->set_disable_sqrt(true);
486 
487  // initialize vector of square distances
488  SGVector<float64_t> sqdists(num_impostors);
489  // compute square distances
490  index_t i = 0;
491  for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
492  sqdists[i++] = euclidean->distance(it->example,it->impostor);
493 
494  // clean up distance
495  SG_UNREF(euclidean);
496 
497  return sqdists;
498 }
499 
500 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y,
501  float64_t yi)
502 {
503  // indices of the examples with label equal to yi
504  std::vector<index_t> idxs;
505 
506  for (index_t i = 0; i < index_t(y->get_num_labels()); ++i)
507  {
508  if (y->get_label(i) == yi)
509  idxs.push_back(i);
510  }
511 
512  return idxs;
513 }
514 
515 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y,
516  float64_t yi)
517 {
518  // indices of the examples with label equal greater than yi
519  std::vector<index_t> idxs;
520 
521  for (index_t i = 0; i < index_t(y->get_num_labels()); ++i)
522  {
523  if (y->get_label(i) > yi)
524  idxs.push_back(i);
525  }
526 
527  return idxs;
528 }
529 
530 CEuclideanDistance* CLMNNImpl::setup_distance(CDenseFeatures<float64_t>* x,
531  std::vector<index_t>& a, std::vector<index_t>& b)
532 {
533  // create new features only containing examples whose indices are in a
534  x->add_subset(SGVector<index_t>(a.data(), a.size(), false));
536  x->remove_subset();
537 
538  // create new features only containing examples whose indices are in b
539  x->add_subset(SGVector<index_t>(b.data(), b.size(), false));
541  x->remove_subset();
542 
543  // create and return distance
544  CEuclideanDistance* euclidean = new CEuclideanDistance(afeats,bfeats);
545  return euclidean;
546 }
547 
548 #endif /* HAVE_LAPACK */
549 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation