SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RescaleFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 20013 Viktor Gal
8  * Copyright (C) 2013 Viktor Gal
9  */
10 
12 
13 using namespace shogun;
14 
17  m_initialized(false)
18 {
19  register_parameters();
20 }
21 
23 {
24  cleanup();
25 }
26 
27 bool CRescaleFeatures::init(CFeatures* features)
28 {
29  if (!m_initialized)
30  {
31  ASSERT(features->get_feature_class()==C_DENSE);
32  ASSERT(features->get_feature_type()==F_DREAL);
33 
34  CDenseFeatures<float64_t>* simple_features=(CDenseFeatures<float64_t>*) features;
35  int32_t num_examples = simple_features->get_num_vectors();
36  int32_t num_features = simple_features->get_num_features();
37  REQUIRE(num_examples > 1,
38  "number of feature vectors should be at least 2!\n");
39 
40  SG_INFO("Extracting min and range values for each feature\n")
41 
42  m_min = SGVector<float64_t>(num_features);
43  m_range = SGVector<float64_t>(num_features);
44  SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
45  for (index_t i = 0; i < num_features; i++)
46  {
47  SGVector<float64_t> vec = feature_matrix.get_row_vector(i);
48  float64_t cur_min = vec[0];
49  float64_t cur_max = vec[0];
50 
51  /* find the max and min values in one loop */
52  for (index_t j = 1; j < vec.vlen; j++)
53  {
54  cur_min = CMath::min(vec[j], cur_min);
55  cur_max = CMath::max(vec[j], cur_max);
56  }
57 
58  /* only rescale if range > 0 */
59  if ((cur_max - cur_min) > 0) {
60  m_min[i] = cur_min;
61  m_range[i] = 1.0/(cur_max - cur_min);
62  }
63  else {
64  m_min[i] = 0.0;
65  m_range[i] = 1.0;
66  }
67  }
68 
69  m_initialized = true;
70 
71  return true;
72  }
73 
74  return false;
75 }
76 
78 {
79  m_initialized = false;
80 }
81 
83 {
85 
86  SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
87  ASSERT(feature_matrix.num_rows == m_min.vlen);
88 
89  for (index_t i = 0; i < feature_matrix.num_cols; i++)
90  {
91  float64_t* vec = feature_matrix.get_column_vector(i);
93  for (index_t j = 0; j < feature_matrix.num_rows; j++) {
94  vec[j] *= m_range[j];
95  }
96  }
97 
98  return feature_matrix;
99 }
100 
102 {
104  ASSERT(m_min.vlen == vector.vlen);
105 
106  float64_t* ret = SG_MALLOC(float64_t, vector.vlen);
107  SGVector<float64_t>::add(ret, 1.0, vector.vector, -1.0, m_min.vector, vector.vlen);
108  for (index_t i = 0; i < vector.vlen; i++) {
109  ret[i] *= m_range[i];
110  }
111 
112  return SGVector<float64_t>(ret,vector.vlen);
113 }
114 
115 void CRescaleFeatures::register_parameters()
116 {
117  SG_ADD(&m_min, "min", "minimum values of each feature", MS_NOT_AVAILABLE);
118  SG_ADD(&m_range, "range", "Reciprocal of the range of each feature", MS_NOT_AVAILABLE);
119  SG_ADD(&m_initialized, "initialized", "Indicator of the state of the preprocessor.", MS_NOT_AVAILABLE);
120 }

SHOGUN Machine Learning Toolbox - Documentation