SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FeatureBlockLogisticRegression.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Sergey Lisitsyn
8  */
9 
10 
12 #ifdef USE_GPL_SHOGUN
15 
18 
19 namespace shogun
20 {
21 
22 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() :
23  CLinearMachine()
24 {
25  init();
26  register_parameters();
27 }
28 
29 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression(
30  float64_t z, CDotFeatures* train_features,
31  CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
32  CLinearMachine()
33 {
34  init();
35  set_feature_relation(feature_relation);
36  set_z(z);
37  set_features(train_features);
38  set_labels(train_labels);
39  register_parameters();
40 }
41 
42 void CFeatureBlockLogisticRegression::init()
43 {
44  m_feature_relation=NULL;
45  m_z=0.0;
46  m_q=2.0;
47  m_termination=0;
48  m_regularization=0;
49  m_tolerance=1e-3;
50  m_max_iter=1000;
51 }
52 
53 CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression()
54 {
55  SG_UNREF(m_feature_relation);
56 }
57 
58 void CFeatureBlockLogisticRegression::register_parameters()
59 {
60  SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
61  SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
62  SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
63  SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
64  SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
65  SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
66  SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
67 }
68 
69 CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const
70 {
71  SG_REF(m_feature_relation);
72  return m_feature_relation;
73 }
74 
75 void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation)
76 {
77  SG_REF(feature_relation);
78  SG_UNREF(m_feature_relation);
79  m_feature_relation = feature_relation;
80 }
81 
82 int32_t CFeatureBlockLogisticRegression::get_max_iter() const
83 {
84  return m_max_iter;
85 }
86 
87 int32_t CFeatureBlockLogisticRegression::get_regularization() const
88 {
89  return m_regularization;
90 }
91 
92 int32_t CFeatureBlockLogisticRegression::get_termination() const
93 {
94  return m_termination;
95 }
96 
97 float64_t CFeatureBlockLogisticRegression::get_tolerance() const
98 {
99  return m_tolerance;
100 }
101 
102 float64_t CFeatureBlockLogisticRegression::get_z() const
103 {
104  return m_z;
105 }
106 
107 float64_t CFeatureBlockLogisticRegression::get_q() const
108 {
109  return m_q;
110 }
111 
112 void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter)
113 {
114  ASSERT(max_iter>=0)
115  m_max_iter = max_iter;
116 }
117 
118 void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization)
119 {
120  ASSERT(regularization==0 || regularization==1)
121  m_regularization = regularization;
122 }
123 
124 void CFeatureBlockLogisticRegression::set_termination(int32_t termination)
125 {
126  ASSERT(termination>=0 && termination<=4)
127  m_termination = termination;
128 }
129 
130 void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance)
131 {
132  ASSERT(tolerance>0.0)
133  m_tolerance = tolerance;
134 }
135 
136 void CFeatureBlockLogisticRegression::set_z(float64_t z)
137 {
138  m_z = z;
139 }
140 
141 void CFeatureBlockLogisticRegression::set_q(float64_t q)
142 {
143  m_q = q;
144 }
145 
146 bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
147 {
148  if (data && (CDotFeatures*)data)
149  set_features((CDotFeatures*)data);
150 
151  ASSERT(features)
152  ASSERT(m_labels)
153 
154  int32_t n_vecs = m_labels->get_num_labels();
155  SGVector<float64_t> y(n_vecs);
156  for (int32_t i=0; i<n_vecs; i++)
157  y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
158 
159  slep_options options = slep_options::default_options();
160  options.q = m_q;
161  options.regularization = m_regularization;
162  options.termination = m_termination;
163  options.tolerance = m_tolerance;
164  options.max_iter = m_max_iter;
165  options.loss = LOGISTIC;
166 
167  EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
168  switch (relation_type)
169  {
170  case GROUP:
171  {
172  CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation;
173  SGVector<index_t> ind = feature_group->get_SLEP_ind();
174  options.ind = ind.vector;
175  options.n_feature_blocks = ind.vlen-1;
176  if (ind[ind.vlen-1] > features->get_dim_feature_space())
177  SG_ERROR("Group of features covers more features than available\n")
178 
179  options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
180  for (int32_t i=0; i<options.n_feature_blocks; i++)
181  options.gWeight[i] = 1.0;
182  options.mode = FEATURE_GROUP;
183  options.loss = LOGISTIC;
184  options.n_nodes = 0;
185  slep_result_t result = slep_solver(features, y.vector, m_z, options);
186 
187  SG_FREE(options.gWeight);
188  int32_t n_feats = features->get_dim_feature_space();
189  SGVector<float64_t> new_w(n_feats);
190  for (int i=0; i<n_feats; i++)
191  new_w[i] = result.w[i];
192  set_bias(result.c[0]);
193 
194  w = new_w;
195  }
196  break;
197  case TREE:
198  {
199  CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation;
200 
201  SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
202  SGVector<float64_t> G;
203  if (feature_tree->is_general())
204  {
205  G = feature_tree->get_SLEP_G();
206  options.general = true;
207  }
208  options.ind_t = ind_t.vector;
209  options.G = G.vector;
210  options.n_nodes = ind_t.vlen/3;
211  options.n_feature_blocks = ind_t.vlen/3;
212  options.mode = FEATURE_TREE;
213  options.loss = LOGISTIC;
214 
215  slep_result_t result = slep_solver(features, y.vector, m_z, options);
216 
217  int32_t n_feats = features->get_dim_feature_space();
218  SGVector<float64_t> new_w(n_feats);
219  for (int i=0; i<n_feats; i++)
220  new_w[i] = result.w[i];
221 
222  set_bias(result.c[0]);
223 
224  w = new_w;
225  }
226  break;
227  default:
228  SG_ERROR("Not supported feature relation type\n")
229  }
230 
231  return true;
232 }
233 
234 float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx)
235 {
236  return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
237 }
238 
239 SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data)
240 {
241  if (data)
242  {
243  if (!data->has_property(FP_DOT))
244  SG_ERROR("Specified features are not of type CDotFeatures\n")
245 
246  set_features((CDotFeatures*) data);
247  }
248 
249  if (!features)
250  return SGVector<float64_t>();
251 
252  int32_t num=features->get_num_vectors();
253  ASSERT(num>0)
254  ASSERT(w.vlen==features->get_dim_feature_space())
255 
256  float64_t* out=SG_MALLOC(float64_t, num);
257  features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
258  for (int32_t i=0; i<num; i++)
259  out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
260  return SGVector<float64_t>(out,num);
261 }
262 
263 }
264 #endif //USE_GPL_SHOGUN
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_REF(x)
Definition: SGObject.h:54
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static float64_t exp(float64_t x)
Definition: Math.h:621
#define SG_ADD(...)
Definition: SGObject.h:84

SHOGUN Machine Learning Toolbox - Documentation