24 #define SUMSQCOLS(A) ((A).array().square().colwise().sum())
27 using namespace Eigen;
30 : example(ex), target(tar), impostor(imp)
34 bool CImpostorNode::operator<(
const CImpostorNode& rhs)
const
36 if (example == rhs.example)
38 if (target == rhs.target)
39 return impostor < rhs.impostor;
41 return target < rhs.target;
44 return example < rhs.example;
47 void CLMNNImpl::check_training_setup(
CFeatures* features,
const CLabels* labels,
51 "LMNN can only be applied to features that support dot products\n")
53 "LMNN supports only MulticlassLabels\n")
55 "The number of feature vectors must be equal to the number of labels\n")
58 "Currently, LMNN supports only DenseFeatures\n")
65 init_transform = CLMNNImpl::compute_pca_transform(x);
69 "The initial transform must be a square matrix of size equal to the "
70 "number of features\n")
76 SG_SDEBUG(
"Entering CLMNNImpl::find_target_nn().\n")
79 int32_t d = x->get_num_features();
91 for (
index_t i = 0; i < unique_labels.vlen; ++i)
93 int32_t slice_size = 0;
98 idxsmap[slice_size] = j;
104 for (int32_t j = 0; j < slice_size; ++j)
107 features_slice->set_feature_matrix(
SGMatrix<float64_t>(slice_mat.data(), d, slice_size,
false));
112 labels_vec.set_const(unique_labels[i]);
113 labels_slice->set_labels(labels_vec);
120 for (
index_t j = 0; j < target_slice.num_cols; ++j)
123 target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ];
134 SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n")
136 return target_neighbors;
142 int32_t d = x->get_num_features();
150 for (
index_t i = 0; i < target_nn.num_cols; ++i)
152 for (
index_t j = 0; j < target_nn.num_rows; ++j)
154 VectorXd dx = X.col(i) - X.col(target_nn(j,i));
155 sop += dx*dx.transpose();
164 const uint32_t iter,
const uint32_t correction)
166 SG_SDEBUG(
"Entering CLMNNImpl::find_impostors().\n")
169 int32_t n = x->get_num_vectors();
171 int32_t d = x->get_num_features();
173 int32_t k = target_nn.num_rows;
176 Map<const
MatrixXd> X(x->get_feature_matrix().matrix, d, n);
181 MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn);
186 static ImpostorsSetType Nexact;
189 REQUIRE(correction>0, "The number of iterations between exact updates of the "
190 "impostors set must be greater than 0\n")
191 if ((iter % correction)==0)
193 Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k);
198 N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn);
201 SG_SDEBUG(
"Leaving CLMNNImpl::find_impostors().\n")
207 const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization)
210 ImpostorsSetType Np_Nc, Nc_Np;
211 set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin()));
212 set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin()));
215 Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors());
219 for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it)
221 VectorXd dx1 = X.col(it->example) - X.col(it->target);
222 VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
223 G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
227 for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it)
229 VectorXd dx1 = X.col(it->example) - X.col(it->target);
230 VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
231 G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
235 void CLMNNImpl::gradient_step(
MatrixXd& L,
const MatrixXd& G, float64_t stepsize,
bool diagonal)
244 VectorXd m = M.diagonal();
247 zero.resize(m.size());
251 VectorXd l = m.array().max(zero.array()).array().sqrt();
257 L -= stepsize*(2*L*G);
261 void CLMNNImpl::correct_stepsize(float64_t& stepsize,
const SGVector<float64_t> obj,
const uint32_t iter)
266 float64_t
delta = obj[iter] - obj[iter-1];
283 bool CLMNNImpl::check_termination(float64_t stepsize,
const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold)
285 if (iter >= maxiter-1)
287 SG_SWARNING(
"Maximum number of iterations reached before convergence.");
291 if (stepsize < stepsize_threshold)
293 SG_SDEBUG(
"Step size too small to make more progress. Convergence reached.\n");
299 for (int32_t i = 0; i < 3; ++i)
301 if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold)
305 SG_SDEBUG(
"No more progress in the objective. Convergence reached.\n");
315 SG_SDEBUG(
"Initializing LMNN transform using PCA.\n");
323 mean_substractor->init(cloned_features);
324 mean_substractor->apply_to_feature_matrix(cloned_features);
329 pca->
init(cloned_features);
336 return pca_transform;
343 int32_t n = LX.cols();
345 int32_t d = LX.rows();
358 for (int32_t i = 0; i < k; ++i)
364 lx->add_subset(subset_vec);
377 ImpostorsSetType CLMNNImpl::find_impostors_exact(
MatrixXd& LX,
const MatrixXd& sqdists,
380 SG_SDEBUG(
"Entering CLMNNImpl::find_impostors_exact().\n")
383 ImpostorsSetType N = ImpostorsSetType();
386 int32_t n = LX.cols();
388 int32_t d = LX.rows();
390 SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
394 SGVector<float64_t> unique = y->get_unique_labels();
397 for (
index_t i = 0; i < unique.vlen-1; ++i)
400 std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]);
403 std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]);
409 for (int32_t j = 0; j < k; ++j)
411 for (std::size_t ii = 0; ii < iidxs.size(); ++ii)
413 for (std::size_t jj = 0; jj < gtidxs.size(); ++jj)
418 if (distance <= sqdists(j,iidxs[ii]))
419 N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) );
421 if (distance <= sqdists(j,gtidxs[jj]))
422 N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) );
432 SG_SDEBUG(
"Leaving CLMNNImpl::find_impostors_exact().\n")
437 ImpostorsSetType CLMNNImpl::find_impostors_approx(
MatrixXd& LX, const
MatrixXd& sqdists,
440 SG_SDEBUG(
"Entering CLMNNImpl::find_impostors_approx().\n")
443 ImpostorsSetType N = ImpostorsSetType();
446 SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact);
450 for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
454 while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows)
457 REQUIRE(target_idx<target_nn.num_rows,
"The index of the target neighbour in the "
458 "impostors set was not found in the target neighbours matrix. "
459 "There must be a bug in find_impostors_exact.\n")
461 if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) )
465 SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n")
470 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(
MatrixXd& LX, const ImpostorsSetType& Nexact)
473 int32_t n = LX.cols();
475 int32_t d = LX.rows();
477 size_t num_impostors = Nexact.
size();
485 euclidean->set_disable_sqrt(
true);
491 for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
492 sqdists[i++] = euclidean->distance(it->example,it->impostor);
500 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y,
504 std::vector<index_t> idxs;
515 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y,
519 std::vector<index_t> idxs;
531 std::vector<index_t>& a, std::vector<index_t>& b)
float distance(CJLCoverTreePoint p1, CJLCoverTreePoint p2, float64_t upper_bound)
virtual ELabelType get_label_type() const =0
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
virtual int32_t get_num_labels() const
The class Labels models labels, i.e. class assignments of objects.
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,...
SGMatrix< ST > get_feature_matrix()
virtual int32_t get_num_vectors() const =0
float64_t get_label(int32_t idx)
SGMatrix< index_t > nearest_neighbors()
Preprocessor PruneVarSubMean will substract the mean and remove features that have zero variance...
Multiclass Labels for multi-class classification.
virtual void set_disable_sqrt(bool state)
#define SUMSQCOLS(A)
useful shorthands to perform operations with Eigen matrices
void set_target_dim(int32_t dim)
virtual EFeatureClass get_feature_class() const =0
Class KNN, an implementation of the standard k-nearest neigbor classifier.
virtual bool init(CFeatures *features)
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
all of classes and functions are contained in the shogun namespace
virtual void remove_subset()
The class Features is the base class of all feature objects.
Preprocessor PCA performs principial component analysis on input feature vectors/matrices. When the init method in PCA is called with proper feature matrix X (with say N number of vectors and D feature dimension), a transformation matrix is computed and stored internally. This transformation matrix is then used to transform all D-dimensional feature vectors or feature matrices (with D feature dimensions) supplied via apply_to_feature_matrix or apply_to_feature_vector methods. This tranformation outputs the T-Dimensional approximation of all these input vectors and matrices (where T<=min(D,N)). The transformation matrix is essentially a DxT matrix, the columns of which correspond to the eigenvectors of the covariance matrix(XX') having top T eigenvalues.
SGVector< T > get_row_vector(index_t row) const
bool has_property(EFeatureProperty p) const
virtual void add_subset(SGVector< index_t > subset)
SGMatrix< float64_t > get_transformation_matrix()