23 #define SUMSQCOLS(A)    ((A).array().square().colwise().sum()) 
   26 using namespace Eigen;
 
   29 : example(ex), target(tar), impostor(imp)
 
   35     if (example == rhs.example)
 
   37         if (target == rhs.target)
 
   38             return impostor < rhs.impostor;
 
   40             return target < rhs.target;
 
   43         return example < rhs.example;
 
   46 void CLMNNImpl::check_training_setup(
CFeatures* features, 
const CLabels* labels,
 
   50             "LMNN can only be applied to features that support dot products\n")
 
   52             "LMNN supports only MulticlassLabels\n")
 
   54             "The number of feature vectors must be equal to the number of labels\n")
 
   57             "Currently, LMNN supports only DenseFeatures\n")
 
   64         init_transform = CLMNNImpl::compute_pca_transform(x);
 
   68             "The initial transform must be a square matrix of size equal to the " 
   69             "number of features\n")
 
   75     SG_SDEBUG(
"Entering CLMNNImpl::find_target_nn().\n")
 
   78     int32_t d = x->get_num_features();
 
   90     for (
index_t i = 0; i < unique_labels.vlen; ++i)
 
   92         int32_t slice_size = 0;
 
   97                 idxsmap[slice_size] = j;
 
  103         for (int32_t j = 0; j < slice_size; ++j)
 
  106         features_slice->set_feature_matrix(
SGMatrix<float64_t>(slice_mat.data(), d, slice_size, 
false));
 
  111         labels_vec.set_const(unique_labels[i]);
 
  112         labels_slice->set_labels(labels_vec);
 
  119         for (
index_t j = 0; j < target_slice.num_cols; ++j)
 
  122                 target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ];
 
  133     SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n")
 
  135     return target_neighbors;
 
  141     int32_t d = x->get_num_features();
 
  149     for (
index_t i = 0; i < target_nn.num_cols; ++i)
 
  151         for (
index_t j = 0; j < target_nn.num_rows; ++j)
 
  153             VectorXd dx = X.col(i) - X.col(target_nn(j,i));
 
  154             sop += dx*dx.transpose();
 
  163         const uint32_t iter, 
const uint32_t correction)
 
  165     SG_SDEBUG(
"Entering CLMNNImpl::find_impostors().\n")
 
  168     int32_t n = x->get_num_vectors();
 
  170     int32_t d = x->get_num_features();
 
  172     int32_t k = target_nn.num_rows;
 
  175     Map<const 
MatrixXd> X(x->get_feature_matrix().matrix, d, n);
 
  180     MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn);
 
  185     static ImpostorsSetType Nexact;
 
  188     REQUIRE(correction>0, "The number of iterations between exact updates of the "
 
  189             "impostors set must be greater than 0\n")
 
  190     if ((iter % correction)==0)
 
  192         Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k);
 
  197         N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn);
 
  200     SG_SDEBUG(
"Leaving CLMNNImpl::find_impostors().\n")
 
  206         const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization)
 
  209     ImpostorsSetType Np_Nc, Nc_Np;
 
  210     set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin()));
 
  211     set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin()));
 
  214     Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors());
 
  218     for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it)
 
  220         VectorXd dx1 = X.col(it->example) - X.col(it->target);
 
  221         VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
 
  222         G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
 
  226     for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it)
 
  228         VectorXd dx1 = X.col(it->example) - X.col(it->target);
 
  229         VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
 
  230         G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
 
  234 void CLMNNImpl::gradient_step(
MatrixXd& L, 
const MatrixXd& G, float64_t stepsize, 
bool diagonal)
 
  243         VectorXd m = M.diagonal();
 
  246         zero.resize(m.size());
 
  250         VectorXd l = m.array().max(zero.array()).array().sqrt();
 
  256         L -= stepsize*(2*L*G);
 
  260 void CLMNNImpl::correct_stepsize(float64_t& stepsize, 
const SGVector<float64_t> obj, 
const uint32_t iter)
 
  265         float64_t delta = obj[iter] - obj[iter-1];
 
  282 bool CLMNNImpl::check_termination(float64_t stepsize, 
const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold)
 
  284     if (iter >= maxiter-1)
 
  286         SG_SWARNING(
"Maximum number of iterations reached before convergence.");
 
  290     if (stepsize < stepsize_threshold)
 
  292         SG_SDEBUG(
"Step size too small to make more progress. Convergence reached.\n");
 
  298         for (int32_t i = 0; i < 3; ++i)
 
  300             if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold)
 
  304         SG_SDEBUG(
"No more progress in the objective. Convergence reached.\n");
 
  314     SG_SDEBUG(
"Initializing LMNN transform using PCA.\n");
 
  322     mean_substractor->init(cloned_features);
 
  323     mean_substractor->apply_to_feature_matrix(cloned_features);
 
  328     pca->
init(cloned_features);
 
  335     return pca_transform;
 
  342     int32_t n = LX.cols();
 
  344     int32_t d = LX.rows();
 
  357     for (int32_t i = 0; i < k; ++i)
 
  363         lx->add_subset(subset_vec);
 
  376 ImpostorsSetType CLMNNImpl::find_impostors_exact(
MatrixXd& LX, 
const MatrixXd& sqdists,
 
  379     SG_SDEBUG(
"Entering CLMNNImpl::find_impostors_exact().\n")
 
  382     ImpostorsSetType N = ImpostorsSetType();
 
  385     int32_t n = LX.cols();
 
  387     int32_t d = LX.rows();
 
  389     SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
 
  393     SGVector<float64_t> unique = y->get_unique_labels();
 
  396     for (
index_t i = 0; i < unique.vlen-1; ++i)
 
  399         std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]);
 
  402         std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]);
 
  408         for (int32_t j = 0; j < k; ++j)
 
  410             for (std::size_t ii = 0; ii < iidxs.size(); ++ii)
 
  412                 for (std::size_t jj = 0; jj < gtidxs.size(); ++jj)
 
  417                     if (distance <= sqdists(j,iidxs[ii]))
 
  418                         N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) );
 
  420                     if (distance <= sqdists(j,gtidxs[jj]))
 
  421                         N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) );
 
  431     SG_SDEBUG(
"Leaving CLMNNImpl::find_impostors_exact().\n")
 
  436 ImpostorsSetType CLMNNImpl::find_impostors_approx(
MatrixXd& LX, const 
MatrixXd& sqdists,
 
  439     SG_SDEBUG(
"Entering CLMNNImpl::find_impostors_approx().\n")
 
  442     ImpostorsSetType N = ImpostorsSetType();
 
  445     SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact);
 
  449     for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
 
  453         while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows)
 
  456         REQUIRE(target_idx<target_nn.num_rows, 
"The index of the target neighbour in the " 
  457                 "impostors set was not found in the target neighbours matrix. " 
  458                 "There must be a bug in find_impostors_exact.\n")
 
  460         if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) )
 
  464     SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n")
 
  469 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(
MatrixXd& LX, const ImpostorsSetType& Nexact)
 
  472     int32_t n = LX.cols();
 
  474     int32_t d = LX.rows();
 
  476     size_t num_impostors = Nexact.
size();
 
  484     euclidean->set_disable_sqrt(
true);
 
  490     for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
 
  491         sqdists[i++] = euclidean->distance(it->example,it->impostor);
 
  499 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y,
 
  503     std::vector<index_t> idxs;
 
  514 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y,
 
  518     std::vector<index_t> idxs;
 
  530         std::vector<index_t>& a, std::vector<index_t>& b)
 
float distance(CJLCoverTreePoint p1, CJLCoverTreePoint p2, float64_t upper_bound)
bool operator<(const BaseTag &first, const BaseTag &second)
virtual ELabelType get_label_type() const =0
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const 
virtual int32_t get_num_labels() const 
The class Labels models labels, i.e. class assignments of objects. 
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,... 
SGMatrix< ST > get_feature_matrix()
virtual int32_t get_num_vectors() const =0
float64_t get_label(int32_t idx)
SGMatrix< index_t > nearest_neighbors()
Preprocessor PruneVarSubMean will substract the mean and remove features that have zero variance...
Multiclass Labels for multi-class classification. 
virtual void set_disable_sqrt(bool state)
#define SUMSQCOLS(A)
useful shorthands to perform operations with Eigen matrices 
void set_target_dim(int32_t dim)
virtual EFeatureClass get_feature_class() const =0
Class KNN, an implementation of the standard k-nearest neigbor classifier. 
virtual bool init(CFeatures *features)
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
all of classes and functions are contained in the shogun namespace 
virtual void remove_subset()
The class Features is the base class of all feature objects. 
Preprocessor PCA performs principial component analysis on input feature vectors/matrices. When the init method in PCA is called with proper feature matrix X (with say N number of vectors and D feature dimension), a transformation matrix is computed and stored internally. This transformation matrix is then used to transform all D-dimensional feature vectors or feature matrices (with D feature dimensions) supplied via apply_to_feature_matrix or apply_to_feature_vector methods. This tranformation outputs the T-Dimensional approximation of all these input vectors and matrices (where T<=min(D,N)). The transformation matrix is essentially a DxT matrix, the columns of which correspond to the eigenvectors of the covariance matrix(XX') having top T eigenvalues. 
SGVector< T > get_row_vector(index_t row) const 
bool has_property(EFeatureProperty p) const 
virtual void add_subset(SGVector< index_t > subset)
SGMatrix< float64_t > get_transformation_matrix()