SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NearestCentroid.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Philippe Tillet
8  */
9 
14 
15 
16 
17 namespace shogun{
18 
20  {
21  init();
22  }
23 
25  {
26  init();
27  ASSERT(d)
28  ASSERT(trainlab)
29  set_distance(d);
30  set_labels(trainlab);
31  }
32 
34  {
35  if(m_is_trained)
37  else
38  delete m_centroids;
39  }
40 
41  void CNearestCentroid::init()
42  {
43  m_shrinking=0;
44  m_is_trained=false;
46  }
47 
48 
50  {
54  ASSERT( data->get_feature_class() == C_DENSE)
55  if (data)
56  {
57  if (m_labels->get_num_labels() != data->get_num_vectors())
58  SG_ERROR("Number of training vectors does not match number of labels\n")
59  distance->init(data, data);
60  }
61  else
62  {
63  data = distance->get_lhs();
64  }
65  int32_t num_vectors = data->get_num_vectors();
66  int32_t num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
67  int32_t num_feats = ((CDenseFeatures<float64_t>*) data)->get_num_features();
68  SGMatrix<float64_t> centroids(num_feats,num_classes);
69  centroids.zero();
70 
71  m_centroids->set_num_features(num_feats);
72  m_centroids->set_num_vectors(num_classes);
73 
74  int64_t* num_per_class = new int64_t[num_classes];
75  for (int32_t i=0 ; i<num_classes ; i++)
76  {
77  num_per_class[i]=0;
78  }
79 
80  for (int32_t idx=0 ; idx<num_vectors ; idx++)
81  {
82  int32_t current_len;
83  bool current_free;
84  int32_t current_class = ((CMulticlassLabels*) m_labels)->get_label(idx);
85  float64_t* target = centroids.matrix + num_feats*current_class;
86  float64_t* current = ((CDenseFeatures<float64_t>*)data)->get_feature_vector(idx,current_len,current_free);
87  SGVector<float64_t>::add(target,1.0,target,1.0,current,current_len);
88  num_per_class[current_class]++;
89  ((CDenseFeatures<float64_t>*)data)->free_feature_vector(current, current_len, current_free);
90  }
91 
92 
93  for (int32_t i=0 ; i<num_classes ; i++)
94  {
95  float64_t* target = centroids.matrix + num_feats*i;
96  int32_t total = num_per_class[i];
97  float64_t scale = 0;
98  if(total>1)
99  scale = 1.0/((float64_t)(total-1));
100  else
101  scale = 1.0/(float64_t)total;
102 
103  SGVector<float64_t>::scale_vector(scale,target,num_feats);
104  }
105 
107  m_centroids->set_feature_matrix(centroids);
108 
109 
110  m_is_trained=true;
112 
113  SG_FREE(num_per_class);
114 
115  return true;
116  }
117 
118 }

SHOGUN Machine Learning Toolbox - Documentation