SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
LMNN.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Fernando J. Iglesias Garcia
8  * Copyright (C) 2013 Fernando J. Iglesias Garcia
9  */
10 
11 #include <shogun/metric/LMNN.h>
12 
13 
14 #include <shogun/metric/LMNNImpl.h>
16 
17 // useful shorthand to perform operations with Eigen matrices
18 // trace of the product of two matrices computed fast using trace(A*B)=sum(A.*B')
19 #define TRACE(A,B) (((A).array()*(B).transpose().array()).sum())
20 
21 using namespace shogun;
22 using namespace Eigen;
23 
25 {
26  init();
27 
28  m_statistics = new CLMNNStatistics();
29  SG_REF(m_statistics);
30 }
31 
33 {
34  init();
35 
36  m_features = features;
37  m_labels = labels;
38  m_k = k;
39 
40  SG_REF(m_features)
41  SG_REF(m_labels)
42 
43  m_statistics = new CLMNNStatistics();
44  SG_REF(m_statistics);
45 }
46 
48 {
49  SG_UNREF(m_features)
50  SG_UNREF(m_labels)
51  SG_UNREF(m_statistics);
52 }
53 
54 const char* CLMNN::get_name() const
55 {
56  return "LMNN";
57 }
58 
59 void CLMNN::train(SGMatrix<float64_t> init_transform)
60 {
61  SG_DEBUG("Entering CLMNN::train().\n")
62 
63  // Check training data and arguments, initializing, if necessary, init_transform
64  CLMNNImpl::check_training_setup(m_features, m_labels, init_transform);
65 
66  // Initializations
67 
68  // cast is safe, check_training_setup ensures features are dense
69  CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(m_features);
71  SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features());
72 
73  // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L
74  MatrixXd L = Map<MatrixXd>(init_transform.matrix, init_transform.num_rows,
75  init_transform.num_cols);
76  // Compute target or genuine neighbours
77  SG_DEBUG("Finding target nearest neighbors.\n")
78  SGMatrix<index_t> target_nn = CLMNNImpl::find_target_nn(x, y, m_k);
79  // Initialize (sub-)gradient
80  SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n")
81  MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn);
82  // Value of the objective function at every iteration
83  SGVector<float64_t> obj(m_maxiter);
84  // The step size is modified depending on how the objective changes, leave the
85  // step size member unchanged and use a local one
86  float64_t stepsize = m_stepsize;
87  // Last active set of impostors computed exactly, current and previous impostors sets
88  ImpostorsSetType exact_impostors, cur_impostors, prev_impostors;
89  // Iteration counter
90  uint32_t iter = 0;
91  // Criterion for termination
92  bool stop = false;
93  // Make space for the training statistics
94  m_statistics->resize(m_maxiter);
95 
96  // Main loop
97  while (!stop)
98  {
99  SG_PROGRESS(iter, 0, m_maxiter)
100 
101  // Find current set of impostors
102  SG_DEBUG("Finding impostors.\n")
103  cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction);
104  SG_DEBUG("Found %d impostors in the current set.\n", cur_impostors.size())
105 
106  // (Sub-) gradient computation
107  SG_DEBUG("Updating gradient.\n")
108  CLMNNImpl::update_gradient(x, gradient, cur_impostors, prev_impostors, m_regularization);
109  // Take gradient step
110  SG_DEBUG("Taking gradient step.\n")
111  CLMNNImpl::gradient_step(L, gradient, stepsize, m_diagonal);
112 
113  // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient
114  // plus the number of current impostors to account for the margin
115  SG_DEBUG("Computing objective.\n")
116  obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size();
117 
118  // Correct step size
119  CLMNNImpl::correct_stepsize(stepsize, obj, iter);
120 
121  // Check termination criterion
122  stop = CLMNNImpl::check_termination(stepsize, obj, iter, m_maxiter, m_stepsize_threshold, m_obj_threshold);
123 
124  // Update iteration counter
125  iter = iter + 1;
126  // Update previous set of impostors
127  prev_impostors = cur_impostors;
128 
129  // Store statistics for this iteration
130  m_statistics->set(iter-1, obj[iter-1], stepsize, cur_impostors.size());
131 
132  SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n",
133  iter, obj[iter-1], cur_impostors.size(), stepsize)
134  }
135 
136  // Truncate statistics in case convergence was reached in less than maxiter
137  m_statistics->resize(iter);
138 
139  // Store the transformation found in the class attribute
140  int32_t nfeats = x->get_num_features();
141  float64_t* cloned_data = SGMatrix<float64_t>::clone_matrix(L.data(), nfeats, nfeats);
142  m_linear_transform = SGMatrix<float64_t>(cloned_data, nfeats, nfeats);
143 
144  SG_DEBUG("Leaving CLMNN::train().\n")
145 }
146 
148 {
149  return m_linear_transform;
150 }
151 
153 {
154  // Compute Mahalanobis distance matrix M = L^T*L
155 
156  // Put the linear transform L in Eigen to perform the matrix multiplication
157  // L is not copied to another region of memory
158  Map<const MatrixXd> map_linear_transform(m_linear_transform.matrix,
159  m_linear_transform.num_rows, m_linear_transform.num_cols);
160  // TODO exploit that M is symmetric
161  MatrixXd M = map_linear_transform.transpose()*map_linear_transform;
162  // TODO avoid copying
163  SGMatrix<float64_t> mahalanobis_matrix(M.rows(), M.cols());
164  for (index_t i = 0; i < M.rows(); i++)
165  for (index_t j = 0; j < M.cols(); j++)
166  mahalanobis_matrix(i,j) = M(i,j);
167 
168  // Create custom Mahalanobis distance with matrix M associated with the training features
169 
171  new CCustomMahalanobisDistance(m_features, m_features, mahalanobis_matrix);
172  SG_REF(distance)
173 
174  return distance;
175 }
176 
177 int32_t CLMNN::get_k() const
178 {
179  return m_k;
180 }
181 
182 void CLMNN::set_k(const int32_t k)
183 {
184  REQUIRE(k>0, "The number of target neighbors per example must be larger than zero\n");
185  m_k = k;
186 }
187 
189 {
190  return m_regularization;
191 }
192 
193 void CLMNN::set_regularization(const float64_t regularization)
194 {
195  m_regularization = regularization;
196 }
197 
199 {
200  return m_stepsize;
201 }
202 
203 void CLMNN::set_stepsize(const float64_t stepsize)
204 {
205  REQUIRE(stepsize>0, "The step size used in gradient descent must be larger than zero\n")
206  m_stepsize = stepsize;
207 }
208 
210 {
211  return m_stepsize_threshold;
212 }
213 
214 void CLMNN::set_stepsize_threshold(const float64_t stepsize_threshold)
215 {
216  REQUIRE(stepsize_threshold>0,
217  "The threshold for the step size must be larger than zero\n")
218  m_stepsize_threshold = stepsize_threshold;
219 }
220 
221 uint32_t CLMNN::get_maxiter() const
222 {
223  return m_maxiter;
224 }
225 
226 void CLMNN::set_maxiter(const uint32_t maxiter)
227 {
228  REQUIRE(maxiter>0, "The number of maximum iterations must be larger than zero\n")
229  m_maxiter = maxiter;
230 }
231 
232 uint32_t CLMNN::get_correction() const
233 {
234  return m_correction;
235 }
236 
237 void CLMNN::set_correction(const uint32_t correction)
238 {
239  m_correction = correction;
240 }
241 
243 {
244  return m_obj_threshold;
245 }
246 
247 void CLMNN::set_obj_threshold(const float64_t obj_threshold)
248 {
249  REQUIRE(obj_threshold>0,
250  "The threshold for the objective must be larger than zero\n")
251  m_obj_threshold = obj_threshold;
252 }
253 
255 {
256  return m_diagonal;
257 }
258 
259 void CLMNN::set_diagonal(const bool diagonal)
260 {
261  m_diagonal = diagonal;
262 }
263 
265 {
266  SG_REF(m_statistics);
267  return m_statistics;
268 }
269 
270 void CLMNN::init()
271 {
272  SG_ADD(&m_linear_transform, "linear_transform",
273  "Linear transform in matrix form", MS_NOT_AVAILABLE)
274  SG_ADD((CSGObject**) &m_features, "features", "Training features",
276  SG_ADD((CSGObject**) &m_labels, "labels", "Training labels",
278  SG_ADD(&m_k, "k", "Number of target neighbours per example",
280  SG_ADD(&m_regularization, "regularization", "Regularization",
281  MS_AVAILABLE)
282  SG_ADD(&m_stepsize, "stepsize", "Step size in gradient descent",
284  SG_ADD(&m_stepsize_threshold, "stepsize_threshold", "Step size threshold",
286  SG_ADD(&m_maxiter, "maxiter", "Maximum number of iterations",
288  SG_ADD(&m_correction, "correction",
289  "Iterations between exact impostors search", MS_NOT_AVAILABLE)
290  SG_ADD(&m_obj_threshold, "obj_threshold", "Objective threshold",
292  SG_ADD(&m_diagonal, "m_diagonal", "Diagonal transformation", MS_NOT_AVAILABLE);
293  SG_ADD((CSGObject**) &m_statistics, "statistics", "Training statistics",
294  MS_NOT_AVAILABLE);
295 
296  m_features = NULL;
297  m_labels = NULL;
298  m_k = 1;
299  m_regularization = 0.5;
300  m_stepsize = 1e-07;
301  m_stepsize_threshold = 1e-22;
302  m_maxiter = 1000;
303  m_correction = 15;
304  m_obj_threshold = 1e-9;
305  m_diagonal = false;
306  m_statistics = NULL;
307 }
308 
310 {
311  init();
312 }
313 
315 {
316 }
317 
318 const char* CLMNNStatistics::get_name() const
319 {
320  return "LMNNStatistics";
321 }
322 
323 void CLMNNStatistics::resize(int32_t size)
324 {
325  REQUIRE(size > 0, "The new size in CLMNNStatistics::resize must be larger than zero."
326  " Given value is %d.\n", size);
327 
328  obj.resize_vector(size);
329  stepsize.resize_vector(size);
330  num_impostors.resize_vector(size);
331 }
332 
333 void CLMNNStatistics::set(index_t iter, float64_t obj_iter, float64_t stepsize_iter,
334  uint32_t num_impostors_iter)
335 {
336  REQUIRE(iter >= 0 && iter < obj.vlen, "The iteration index in CLMNNStatistics::set "
337  "must be larger or equal to zero and less than the size (%d). Given valu is %d.\n", obj.vlen, iter);
338 
339  obj[iter] = obj_iter;
340  stepsize[iter] = stepsize_iter;
341  num_impostors[iter] = num_impostors_iter;
342 }
343 
344 void CLMNNStatistics::init()
345 {
346  SG_ADD(&obj, "obj", "Objective at each iteration", MS_NOT_AVAILABLE);
347  SG_ADD(&stepsize, "stepsize", "Step size at each iteration", MS_NOT_AVAILABLE);
348  SG_ADD(&num_impostors, "num_impostors", "Number of impostors at each iteration",
350 }
351 
CCustomMahalanobisDistance * get_distance() const
Definition: LMNN.cpp:152
float distance(CJLCoverTreePoint p1, CJLCoverTreePoint p2, float64_t upper_bound)
#define TRACE(A, B)
Definition: LMNN.cpp:19
virtual const char * get_name() const
Definition: LMNN.cpp:54
void set_regularization(const float64_t regularization)
Definition: LMNN.cpp:193
void set_diagonal(const bool diagonal)
Definition: LMNN.cpp:259
virtual const char * get_name() const
Definition: LMNN.cpp:318
float64_t get_obj_threshold() const
Definition: LMNN.cpp:242
bool get_diagonal() const
Definition: LMNN.cpp:254
int32_t get_num_features() const
int32_t index_t
Definition: common.h:62
#define SG_PROGRESS(...)
Definition: SGIO.h:142
float64_t get_stepsize_threshold() const
Definition: LMNN.cpp:209
float64_t get_stepsize() const
Definition: LMNN.cpp:198
Definition: SGMatrix.h:20
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:376
Class LMNNStatistics used to give access to intermediate results obtained training LMNN...
Definition: LMNN.h:249
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
float64_t get_regularization() const
Definition: LMNN.cpp:188
void set(index_t iter, float64_t obj_iter, float64_t stepsize_iter, uint32_t num_impostors_iter)
Definition: LMNN.cpp:333
static T * clone_matrix(const T *matrix, int32_t nrows, int32_t ncols)
Definition: SGMatrix.cpp:263
Multiclass Labels for multi-class classification.
int32_t size() const
Definition: SGVector.h:113
index_t vlen
Definition: SGVector.h:494
uint32_t get_correction() const
Definition: LMNN.cpp:232
void set_maxiter(const uint32_t maxiter)
Definition: LMNN.cpp:226
void set_stepsize_threshold(const float64_t stepsize_threshold)
Definition: LMNN.cpp:214
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
virtual int32_t get_num_vectors() const
CLMNNStatistics * get_statistics() const
Definition: LMNN.cpp:264
double float64_t
Definition: common.h:50
void set_k(const int32_t k)
Definition: LMNN.cpp:182
Class CustomMahalanobisDistance used to compute the distance between feature vectors and as ...
virtual ~CLMNNStatistics()
Definition: LMNN.cpp:314
void resize(int32_t size)
Definition: LMNN.cpp:323
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
SGMatrix< float64_t > get_linear_transform() const
Definition: LMNN.cpp:147
void set_obj_threshold(const float64_t obj_threshold)
Definition: LMNN.cpp:247
void train(SGMatrix< float64_t > init_transform=SGMatrix< float64_t >())
Definition: LMNN.cpp:59
void set_correction(const uint32_t correction)
Definition: LMNN.cpp:237
uint32_t get_maxiter() const
Definition: LMNN.cpp:221
virtual ~CLMNN()
Definition: LMNN.cpp:47
void set_stepsize(const float64_t stepsize)
Definition: LMNN.cpp:203
void resize_vector(int32_t n)
Definition: SGVector.cpp:257
#define SG_ADD(...)
Definition: SGObject.h:84
int32_t get_k() const
Definition: LMNN.cpp:177
static CMulticlassLabels * to_multiclass(CLabels *base_labels)

SHOGUN Machine Learning Toolbox - Documentation