SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
RescaleFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 20013 Viktor Gal
8  * Copyright (C) 2013 Viktor Gal
9  */
10 
12 
13 using namespace shogun;
14 
17  m_initialized(false)
18 {
19  register_parameters();
20 }
21 
23 {
24  cleanup();
25 }
26 
27 bool CRescaleFeatures::init(CFeatures* features)
28 {
29  if (!m_initialized)
30  {
31  ASSERT(features->get_feature_class()==C_DENSE);
32  ASSERT(features->get_feature_type()==F_DREAL);
33 
34  CDenseFeatures<float64_t>* simple_features=(CDenseFeatures<float64_t>*) features;
35  int32_t num_examples = simple_features->get_num_vectors();
36  int32_t num_features = simple_features->get_num_features();
37  REQUIRE(num_examples > 1,
38  "number of feature vectors should be at least 2!\n");
39 
40  SG_INFO("Extracting min and range values for each feature\n")
41 
42  m_min = SGVector<float64_t>(num_features);
43  m_range = SGVector<float64_t>(num_features);
44  SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
45  for (index_t i = 0; i < num_features; i++)
46  {
47  SGVector<float64_t> vec = feature_matrix.get_row_vector(i);
48  float64_t cur_min = vec[0];
49  float64_t cur_max = vec[0];
50 
51  /* find the max and min values in one loop */
52  for (index_t j = 1; j < vec.vlen; j++)
53  {
54  cur_min = CMath::min(vec[j], cur_min);
55  cur_max = CMath::max(vec[j], cur_max);
56  }
57 
58  /* only rescale if range > 0 */
59  if ((cur_max - cur_min) > 0) {
60  m_min[i] = cur_min;
61  m_range[i] = 1.0/(cur_max - cur_min);
62  }
63  else {
64  m_min[i] = 0.0;
65  m_range[i] = 1.0;
66  }
67  }
68 
69  m_initialized = true;
70 
71  return true;
72  }
73 
74  return false;
75 }
76 
78 {
79  m_initialized = false;
80 }
81 
83 {
85 
86  SGMatrix<float64_t> feature_matrix=((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
87  ASSERT(feature_matrix.num_rows == m_min.vlen);
88 
89  for (index_t i = 0; i < feature_matrix.num_cols; i++)
90  {
91  float64_t* vec = feature_matrix.get_column_vector(i);
93  for (index_t j = 0; j < feature_matrix.num_rows; j++) {
94  vec[j] *= m_range[j];
95  }
96  }
97 
98  return feature_matrix;
99 }
100 
102 {
104  ASSERT(m_min.vlen == vector.vlen);
105 
106  float64_t* ret = SG_MALLOC(float64_t, vector.vlen);
107  SGVector<float64_t>::add(ret, 1.0, vector.vector, -1.0, m_min.vector, vector.vlen);
108  for (index_t i = 0; i < vector.vlen; i++) {
109  ret[i] *= m_range[i];
110  }
111 
112  return SGVector<float64_t>(ret,vector.vlen);
113 }
114 
115 void CRescaleFeatures::register_parameters()
116 {
117  SG_ADD(&m_min, "min", "minimum values of each feature", MS_NOT_AVAILABLE);
118  SG_ADD(&m_range, "range", "Reciprocal of the range of each feature", MS_NOT_AVAILABLE);
119  SG_ADD(&m_initialized, "initialized", "Indicator of the state of the preprocessor.", MS_NOT_AVAILABLE);
120 }
#define SG_INFO(...)
Definition: SGIO.h:118
int32_t index_t
Definition: common.h:62
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t num_cols
Definition: SGMatrix.h:378
index_t num_rows
Definition: SGMatrix.h:376
virtual SGVector< float64_t > apply_to_feature_vector(SGVector< float64_t > vector)
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
Template class DensePreprocessor, base class for preprocessors (cf. CPreprocessor) that apply to CDen...
double float64_t
Definition: common.h:50
virtual SGMatrix< float64_t > apply_to_feature_matrix(CFeatures *features)
virtual EFeatureClass get_feature_class() const =0
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:115
static T max(T a, T b)
Definition: Math.h:168
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:531
SGVector< float64_t > m_min
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
SGVector< float64_t > m_range
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:157
#define SG_ADD(...)
Definition: SGObject.h:81
virtual EFeatureType get_feature_type() const =0
void add(const SGVector< T > x)
Definition: SGVector.cpp:281

SHOGUN Machine Learning Toolbox - Documentation