SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
NbodyTree.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Parijat Mazumdar
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 
32 #ifndef _NBODYTREE_H__
33 #define _NBODYTREE_H__
34 
35 #include <shogun/lib/config.h>
36 
38 #include <shogun/kernel/Kernel.h>
43 
44 namespace shogun
45 {
46 
50 class CNbodyTree : public CTreeMachine<NbodyTreeNodeData>
51 {
52 public:
53 
59  CNbodyTree(int32_t leaf_size=1, EDistanceType d=D_EUCLIDEAN);
60 
62  virtual ~CNbodyTree() { };
63 
67  virtual const char* get_name() const { return "NbodyTree"; }
68 
73 
79 
85  void query_knn(CDenseFeatures<float64_t>* data, int32_t k);
86 
97 
110 
116 
122 
123 protected:
131  virtual float64_t min_dist(bnode_t* node,float64_t* feat, int32_t dim)=0;
132 
139  virtual float64_t min_dist_dual(bnode_t* nodeq, bnode_t* noder)=0;
140 
147  virtual float64_t max_dist_dual(bnode_t* nodeq, bnode_t* noder)=0;
148 
155  virtual void init_node(bnode_t* node, index_t start, index_t end)=0;
156 
165  virtual void min_max_dist(float64_t* pt, bnode_t* node, float64_t &lower,float64_t &upper, int32_t dim)=0;
166 
173  {
174  if (m_dist==D_MANHATTAN)
175  return dists;
176 
177  return CMath::sqrt(dists);
178  }
179 
187  float64_t distance(index_t vec, float64_t* arr, int32_t dim);
188 
195  {
196  if (m_dist==D_EUCLIDEAN)
197  return d*d;
198  else if (m_dist==D_MANHATTAN)
199  return CMath::abs(d);
200  else
201  SG_ERROR("distance metric not recognized\n");
202 
203  return 0;
204  }
205 
206 private:
207 
216  void query_knn_single(CKNNHeap* heap, float64_t min_dist, bnode_t* node, float64_t* arr, int32_t dim);
217 
232  void get_kde_single(bnode_t* node,float64_t* data, EKernelType kernel, float64_t h, float64_t log_atol, float64_t log_rtol,
233  float64_t log_norm, float64_t min_bound_node, float64_t spread_node, float64_t &min_bound_global, float64_t &spread_global);
234 
252  void kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t> qid, SGMatrix<float64_t> qdata, SGVector<float64_t> log_density,
253  EKernelType kernel_type, float64_t h, float64_t log_atol, float64_t log_rtol, float64_t log_norm, float64_t min_bound_node,
254  float64_t spread_node, float64_t &min_bound_global, float64_t &spread_global);
255 
262  CBinaryTreeMachineNode<NbodyTreeNodeData>* recursive_build(index_t start, index_t end);
263 
271  void partition(index_t dim, index_t start, index_t end, index_t mid);
272 
278  index_t find_split_dim(bnode_t* node);
279 
286  inline float64_t logsumexp(float64_t x, float64_t y)
287  {
288  float64_t a=CMath::max(x,y);
289  if (a==-CMath::INFTY)
290  return -CMath::INFTY;
291 
292  return a+CMath::log(CMath::exp(x-a)+CMath::exp(y-a));
293  }
294 
301  inline float64_t logdiffexp(float64_t x, float64_t y)
302  {
303  if (x<=y)
304  return -CMath::INFTY;
305 
306  return x+CMath::log(1-CMath::exp(y-x));
307  }
308 
310  void init();
311 
312 protected:
315 
318 
319 private:
321  int32_t m_leaf_size;
322 
324  EDistanceType m_dist;
325 
327  bool m_knn_done;
328 
330  SGMatrix<float64_t> m_knn_dists;
331 
333  SGMatrix<index_t> m_knn_indices;
334 };
335 } /* namespace shogun */
336 
337 #endif /* _NBODYTREE_H__ */
virtual const char * get_name() const
Definition: NbodyTree.h:67
The node of the tree structure forming a TreeMachine The node contains pointer to its parent and poin...
EKernelType
Definition: Kernel.h:57
SGVector< index_t > get_rearranged_vector_ids() const
Definition: NbodyTree.h:72
int32_t index_t
Definition: common.h:62
SGMatrix< index_t > get_knn_indices()
Definition: NbodyTree.cpp:155
static const float64_t INFTY
infinity
Definition: Math.h:2048
#define SG_ERROR(...)
Definition: SGIO.h:129
float64_t distance(index_t vec, float64_t *arr, int32_t dim)
Definition: NbodyTree.cpp:201
float64_t add_dim_dist(float64_t d)
Definition: NbodyTree.h:194
virtual ~CNbodyTree()
Definition: NbodyTree.h:62
void build_tree(CDenseFeatures< float64_t > *data)
Definition: NbodyTree.cpp:45
SGMatrix< float64_t > m_data
Definition: NbodyTree.h:314
virtual float64_t max_dist_dual(bnode_t *nodeq, bnode_t *noder)=0
CNbodyTree(int32_t leaf_size=1, EDistanceType d=D_EUCLIDEAN)
Definition: NbodyTree.cpp:36
SGMatrix< float64_t > get_knn_dists()
Definition: NbodyTree.cpp:146
virtual float64_t min_dist(bnode_t *node, float64_t *feat, int32_t dim)=0
void query_knn(CDenseFeatures< float64_t > *data, int32_t k)
Definition: NbodyTree.cpp:59
EDistanceType
Definition: Distance.h:32
SGVector< float64_t > log_kernel_density_dual(SGMatrix< float64_t > test, SGVector< index_t > qid, bnode_t *qroot, EKernelType kernel, float64_t h, float64_t atol, float64_t rtol)
Definition: NbodyTree.cpp:116
double float64_t
Definition: common.h:50
SGVector< float64_t > log_kernel_density(SGMatrix< float64_t > test, EKernelType kernel, float64_t h, float64_t atol, float64_t rtol)
Definition: NbodyTree.cpp:86
static T max(T a, T b)
Definition: Math.h:168
This class implements genaralized tree for N-body problems like k-NN, kernel density estimation...
Definition: NbodyTree.h:50
float64_t actual_dists(float64_t dists)
Definition: NbodyTree.h:172
virtual float64_t min_dist_dual(bnode_t *nodeq, bnode_t *noder)=0
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void min_max_dist(float64_t *pt, bnode_t *node, float64_t &lower, float64_t &upper, int32_t dim)=0
SGVector< index_t > m_vec_id
Definition: NbodyTree.h:317
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
class TreeMachine, a base class for tree based multiclass classifiers. This class is derived from CBa...
Definition: TreeMachine.h:48
virtual void init_node(bnode_t *node, index_t start, index_t end)=0
static float32_t sqrt(float32_t x)
Definition: Math.h:459
This class implements a specialized version of max heap structure. This heap specializes in storing t...
Definition: KNNHeap.h:47
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation