SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LMNN.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Fernando J. Iglesias Garcia
8  * Copyright (C) 2013 Fernando J. Iglesias Garcia
9  */
10 
11 #ifdef HAVE_EIGEN3
12 #ifdef HAVE_LAPACK
13 
14 #include <shogun/metric/LMNN.h>
15 #include <shogun/metric/LMNNImpl.h>
17 
19 
20 // trace of the product of two matrices computed fast using trace(A*B)=sum(A.*B')
21 #define TRACE(A,B) (((A).array()*(B).transpose().array()).sum())
22 
23 using namespace shogun;
24 using namespace Eigen;
25 
27 {
28  init();
29 
30  m_statistics = new CLMNNStatistics();
31  SG_REF(m_statistics);
32 }
33 
35 {
36  init();
37 
38  m_features = features;
39  m_labels = labels;
40  m_k = k;
41 
42  SG_REF(m_features)
43  SG_REF(m_labels)
44 
45  m_statistics = new CLMNNStatistics();
46  SG_REF(m_statistics);
47 }
48 
50 {
51  SG_UNREF(m_features)
52  SG_UNREF(m_labels)
53  SG_UNREF(m_statistics);
54 }
55 
56 const char* CLMNN::get_name() const
57 {
58  return "LMNN";
59 }
60 
61 void CLMNN::train(SGMatrix<float64_t> init_transform)
62 {
63  SG_DEBUG("Entering CLMNN::train().\n")
64 
65 
66  CLMNNImpl::check_training_setup(m_features, m_labels, init_transform);
67 
69 
70  // cast is safe, check_training_setup ensures features are dense
71  CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(m_features);
73  SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features());
74 
75  // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L
76  MatrixXd L = Map<MatrixXd>(init_transform.matrix, init_transform.num_rows,
77  init_transform.num_cols);
78  // Compute target or genuine neighbours
79  SG_DEBUG("Finding target nearest neighbors.\n")
80  SGMatrix<index_t> target_nn = CLMNNImpl::find_target_nn(x, y, m_k);
81  // Initialize (sub-)gradient
82  SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n")
83  MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn);
84  // Value of the objective function at every iteration
85  SGVector<float64_t> obj(m_maxiter);
86  // The step size is modified depending on how the objective changes, leave the
87  // step size member unchanged and use a local one
88  float64_t stepsize = m_stepsize;
89  // Last active set of impostors computed exactly, current and previous impostors sets
90  ImpostorsSetType exact_impostors, cur_impostors, prev_impostors;
91  // Iteration counter
92  uint32_t iter = 0;
93  // Criterion for termination
94  bool stop = false;
95  // Make space for the training statistics
96  m_statistics->resize(m_maxiter);
97 
99  while (!stop)
100  {
101  SG_PROGRESS(iter, 0, m_maxiter)
102 
103  // Find current set of impostors
104  SG_DEBUG("Finding impostors.\n")
105  cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction);
106  SG_DEBUG("Found %d impostors in the current set.\n", cur_impostors.size())
107 
108  // (Sub-) gradient computation
109  SG_DEBUG("Updating gradient.\n")
110  CLMNNImpl::update_gradient(x, gradient, cur_impostors, prev_impostors, m_regularization);
111  // Take gradient step
112  SG_DEBUG("Taking gradient step.\n")
113  CLMNNImpl::gradient_step(L, gradient, stepsize, m_diagonal);
114 
115  // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient
116  // plus the number of current impostors to account for the margin
117  SG_DEBUG("Computing objective.\n")
118  obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size();
119 
120  // Correct step size
121  CLMNNImpl::correct_stepsize(stepsize, obj, iter);
122 
123  // Check termination criterion
124  stop = CLMNNImpl::check_termination(stepsize, obj, iter, m_maxiter, m_stepsize_threshold, m_obj_threshold);
125 
126  // Update iteration counter
127  iter = iter + 1;
128  // Update previous set of impostors
129  prev_impostors = cur_impostors;
130 
131  // Store statistics for this iteration
132  m_statistics->set(iter-1, obj[iter-1], stepsize, cur_impostors.size());
133 
134  SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n",
135  iter, obj[iter-1], cur_impostors.size(), stepsize)
136  }
137 
138  // Truncate statistics in case convergence was reached in less than maxiter
139  m_statistics->resize(iter);
140 
142  int32_t nfeats = x->get_num_features();
143  float64_t* cloned_data = SGMatrix<float64_t>::clone_matrix(L.data(), nfeats, nfeats);
144  m_linear_transform = SGMatrix<float64_t>(cloned_data, nfeats, nfeats);
145 
146  SG_DEBUG("Leaving CLMNN::train().\n")
147 }
148 
150 {
151  return m_linear_transform;
152 }
153 
155 {
156  // Compute Mahalanobis distance matrix M = L^T*L
157 
158  // Put the linear transform L in Eigen to perform the matrix multiplication
159  // L is not copied to another region of memory
160  Map<const MatrixXd> map_linear_transform(m_linear_transform.matrix,
161  m_linear_transform.num_rows, m_linear_transform.num_cols);
162  // TODO exploit that M is symmetric
163  MatrixXd M = map_linear_transform.transpose()*map_linear_transform;
164  // TODO avoid copying
165  SGMatrix<float64_t> mahalanobis_matrix(M.rows(), M.cols());
166  for (index_t i = 0; i < M.rows(); i++)
167  for (index_t j = 0; j < M.cols(); j++)
168  mahalanobis_matrix(i,j) = M(i,j);
169 
170  // Create custom Mahalanobis distance with matrix M associated with the training features
171 
173  new CCustomMahalanobisDistance(m_features, m_features, mahalanobis_matrix);
174  SG_REF(distance)
175 
176  return distance;
177 }
178 
179 int32_t CLMNN::get_k() const
180 {
181  return m_k;
182 }
183 
184 void CLMNN::set_k(const int32_t k)
185 {
186  REQUIRE(k>0, "The number of target neighbors per example must be larger than zero\n");
187  m_k = k;
188 }
189 
191 {
192  return m_regularization;
193 }
194 
195 void CLMNN::set_regularization(const float64_t regularization)
196 {
197  m_regularization = regularization;
198 }
199 
201 {
202  return m_stepsize;
203 }
204 
205 void CLMNN::set_stepsize(const float64_t stepsize)
206 {
207  REQUIRE(stepsize>0, "The step size used in gradient descent must be larger than zero\n")
208  m_stepsize = stepsize;
209 }
210 
212 {
213  return m_stepsize_threshold;
214 }
215 
216 void CLMNN::set_stepsize_threshold(const float64_t stepsize_threshold)
217 {
218  REQUIRE(stepsize_threshold>0,
219  "The threshold for the step size must be larger than zero\n")
220  m_stepsize_threshold = stepsize_threshold;
221 }
222 
223 uint32_t CLMNN::get_maxiter() const
224 {
225  return m_maxiter;
226 }
227 
228 void CLMNN::set_maxiter(const uint32_t maxiter)
229 {
230  REQUIRE(maxiter>0, "The number of maximum iterations must be larger than zero\n")
231  m_maxiter = maxiter;
232 }
233 
234 uint32_t CLMNN::get_correction() const
235 {
236  return m_correction;
237 }
238 
239 void CLMNN::set_correction(const uint32_t correction)
240 {
241  m_correction = correction;
242 }
243 
245 {
246  return m_obj_threshold;
247 }
248 
249 void CLMNN::set_obj_threshold(const float64_t obj_threshold)
250 {
251  REQUIRE(obj_threshold>0,
252  "The threshold for the objective must be larger than zero\n")
253  m_obj_threshold = obj_threshold;
254 }
255 
257 {
258  return m_diagonal;
259 }
260 
261 void CLMNN::set_diagonal(const bool diagonal)
262 {
263  m_diagonal = diagonal;
264 }
265 
267 {
268  SG_REF(m_statistics);
269  return m_statistics;
270 }
271 
272 void CLMNN::init()
273 {
274  SG_ADD(&m_linear_transform, "linear_transform",
275  "Linear transform in matrix form", MS_NOT_AVAILABLE)
276  SG_ADD((CSGObject**) &m_features, "features", "Training features",
278  SG_ADD((CSGObject**) &m_labels, "labels", "Training labels",
280  SG_ADD(&m_k, "k", "Number of target neighbours per example",
282  SG_ADD(&m_regularization, "regularization", "Regularization",
283  MS_AVAILABLE)
284  SG_ADD(&m_stepsize, "stepsize", "Step size in gradient descent",
286  SG_ADD(&m_stepsize_threshold, "stepsize_threshold", "Step size threshold",
288  SG_ADD(&m_maxiter, "maxiter", "Maximum number of iterations",
290  SG_ADD(&m_correction, "correction",
291  "Iterations between exact impostors search", MS_NOT_AVAILABLE)
292  SG_ADD(&m_obj_threshold, "obj_threshold", "Objective threshold",
294  SG_ADD(&m_diagonal, "m_diagonal", "Diagonal transformation", MS_NOT_AVAILABLE);
295  SG_ADD((CSGObject**) &m_statistics, "statistics", "Training statistics",
296  MS_NOT_AVAILABLE);
297 
298  m_features = NULL;
299  m_labels = NULL;
300  m_k = 1;
301  m_regularization = 0.5;
302  m_stepsize = 1e-07;
303  m_stepsize_threshold = 1e-22;
304  m_maxiter = 1000;
305  m_correction = 15;
306  m_obj_threshold = 1e-9;
307  m_diagonal = false;
308  m_statistics = NULL;
309 }
310 
312 {
313  init();
314 }
315 
317 {
318 }
319 
320 const char* CLMNNStatistics::get_name() const
321 {
322  return "LMNNStatistics";
323 }
324 
325 void CLMNNStatistics::resize(int32_t size)
326 {
327  REQUIRE(size > 0, "The new size in CLMNNStatistics::resize must be larger than zero."
328  " Given value is %d.\n", size);
329 
330  obj.resize_vector(size);
331  stepsize.resize_vector(size);
332  num_impostors.resize_vector(size);
333 }
334 
335 void CLMNNStatistics::set(index_t iter, float64_t obj_iter, float64_t stepsize_iter,
336  uint32_t num_impostors_iter)
337 {
338  REQUIRE(iter >= 0 && iter < obj.vlen, "The iteration index in CLMNNStatistics::set "
339  "must be larger or equal to zero and less than the size (%d). Given valu is %d.\n", obj.vlen, iter);
340 
341  obj[iter] = obj_iter;
342  stepsize[iter] = stepsize_iter;
343  num_impostors[iter] = num_impostors_iter;
344 }
345 
346 void CLMNNStatistics::init()
347 {
348  SG_ADD(&obj, "obj", "Objective at each iteration", MS_NOT_AVAILABLE);
349  SG_ADD(&stepsize, "stepsize", "Step size at each iteration", MS_NOT_AVAILABLE);
350  SG_ADD(&num_impostors, "num_impostors", "Number of impostors at each iteration",
352 }
353 
354 #endif /* HAVE_LAPACK */
355 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation