26 #include <shogun/lib/external/falconn/lsh_nn_table.h>
32 using namespace Eigen;
93 SG_ERROR(
"Number of training vectors does not match number of labels\n")
151 SG_PRINT(
"\nQuick sort query %d\n", i)
152 for (int32_t j=0; j<
m_k; j++)
158 for (int32_t j=0; j<
m_k; j++)
159 NN(j,i) = train_idxs[j];
184 ASSERT(m_k<=distance->get_num_vec_lhs())
189 int32_t* train_lab=SG_MALLOC(int32_t,
m_k);
191 SG_INFO(
"%d test examples\n", num_lab)
208 for (int32_t j=0; j<
m_k; j++)
212 int32_t out_idx = choose_class(classes, train_lab);
225 SG_INFO(
"q != 1.0 not supported with cover tree, using q = 1\n")
249 for ( int32_t i = 0 ; i < res.
index ; ++i )
251 for ( int32_t j = 0 ; j < res[i].
index ; ++j )
253 printf(
"%d ", res[i][j].m_index);
260 for ( int32_t i = 0 ; i < res.
index ; ++i )
263 for ( int32_t j = 0; j <
m_k; ++j )
268 int32_t out_idx = choose_class(classes, train_lab);
289 for (int32_t j=0; j<
m_k; j++)
293 int32_t out_idx = choose_class(classes, train_lab);
304 std::vector<falconn::DenseVector<double>> feats;
310 falconn::DenseVector<double> temp =
Map<VectorXd> (vec, len);
311 feats.push_back(temp);
314 falconn::LSHConstructionParameters params
315 = falconn::get_default_parameters<falconn::DenseVector<double>>(features->
get_num_vectors(),
317 falconn::DistanceFunction::EuclideanSquared,
320 if (m_lsh_l && m_lsh_t)
323 auto lsh_table = falconn::construct_table<falconn::DenseVector<double>>(feats, params);
325 lsh_table->set_num_probes(m_lsh_t);
328 std::vector<falconn::DenseVector<double>> query_feats;
336 falconn::DenseVector<double> temp =
Map<VectorXd> (vec, len);
337 auto indices =
new std::vector<int32_t> ();
338 lsh_table->find_k_nearest_neighbors(temp, (int_fast64_t)
m_k, indices);
339 memcpy(NN.get_column_vector(i), indices->data(),
sizeof(int32_t)*m_k);
346 for (int32_t j=0; j<
m_k; j++)
350 int32_t out_idx = choose_class(classes, train_lab);
377 SG_INFO(
"%d test examples\n", num_lab)
398 if (distances[j]<min_dist)
400 min_dist = distances[j];
424 int32_t* output=SG_MALLOC(int32_t,
m_k*num_lab);
427 int32_t* train_lab=SG_MALLOC(int32_t,
m_k);
432 SG_INFO(
"%d test examples\n", num_lab)
462 for ( int32_t i = 0 ; i < res.
index ; ++i )
467 for ( int32_t j = 0 ; j <
m_k ; ++j )
473 res[i][j+1].m_index ];
479 choose_class_for_multiple_k(output+res[i][0].m_index, classes,
502 for (int32_t j=0; j<
m_k; j++)
509 choose_class_for_multiple_k(output+i, classes, train_lab, num_lab);
521 for (int32_t j=0; j<
m_k; j++)
524 choose_class_for_multiple_k(output+i, classes, train_lab, num_lab);
545 SG_ERROR(
"No vectors on left hand side\n")
577 int32_t CKNN::choose_class(
float64_t* classes, int32_t* train_lab)
582 for (int32_t j=0; j<
m_k; j++)
584 classes[train_lab[j]]+= multiplier;
585 multiplier*= multiplier;
594 if (out_max< classes[j])
604 void CKNN::choose_class_for_multiple_k(int32_t* output, int32_t* classes, int32_t* train_lab, int32_t step)
607 memset(classes, 0,
sizeof(int32_t)*m_num_classes);
609 for (int32_t j=0; j<
m_k; j++)
611 classes[train_lab[j]]++;
619 if (out_max< classes[c])
virtual void store_model_features()
virtual bool save(FILE *dstfile)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
Class Distance, a base class for all the distances used in the Shogun toolbox.
int32_t get_num_features() const
virtual void reset_precompute()
void init_distance(CFeatures *data)
The class Labels models labels, i.e. class assignments of objects.
SGMatrix< index_t > get_knn_indices()
virtual int32_t get_num_labels() const =0
static void qsort_index(T1 *output, T2 *index, uint32_t size)
node< P > batch_create(v_array< P > points)
virtual CFeatures * replace_lhs(CFeatures *lhs)
SGMatrix< int32_t > classify_for_multiple_k()
Class v_array taken directly from JL's implementation.
virtual int32_t get_num_vectors() const =0
void distances_lhs(float64_t *result, int32_t idx_a1, int32_t idx_a2, int32_t idx_b)
virtual CFeatures * duplicate() const =0
int32_t m_min_label
smallest label, i.e. -1
virtual bool train_machine(CFeatures *data=NULL)
SGMatrix< index_t > nearest_neighbors()
void build_tree(CDenseFeatures< float64_t > *data)
A generic DistanceMachine interface.
bool set_label(int32_t idx, float64_t label)
virtual bool load(FILE *srcfile)
v_array< CJLCoverTreePoint > parse_points(CDistance *distance, EFeaturesContainer fc)
int32_t m_num_classes
number of classes (i.e. number of values labels can take)
Multiclass Labels for multi-class classification.
int32_t m_k
the k parameter in KNN
void query_knn(CDenseFeatures< float64_t > *data, int32_t k)
virtual void set_store_model_features(bool store_model)
virtual int32_t get_num_vectors() const
static void clear_cancel()
virtual int32_t get_num_vec_rhs()
This class implements KD-Tree. cf. http://www.autonlab.org/autonweb/14665/version/2/part/5/data/moore...
static bool cancel_computations()
virtual CFeatures * replace_rhs(CFeatures *rhs)
float64_t m_q
parameter q of rank weighting
SGVector< int32_t > m_train_labels
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
all of classes and functions are contained in the shogun namespace
The class Features is the base class of all feature objects.
void set_distance(CDistance *d)
virtual void precompute_lhs()
SGVector< T > clone() const
virtual CMulticlassLabels * classify_NN()
virtual CMulticlassLabels * apply_multiclass(CFeatures *data=NULL)
void k_nearest_neighbor(const node< P > &top_node, const node< P > &query, v_array< v_array< P > > &results, int k)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
virtual void set_labels(CLabels *lab)