SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FeatureBlockLogisticRegression.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Sergey Lisitsyn
8  */
9 
13 
16 
17 namespace shogun
18 {
19 
21  CLinearMachine(),
22  m_feature_relation(NULL), m_z(0.0)
23 {
24  register_parameters();
25 }
26 
28  float64_t z, CDotFeatures* train_features,
29  CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
30  CLinearMachine(),
31  m_feature_relation(NULL)
32 {
33  set_feature_relation(feature_relation);
34  set_z(z);
35  set_q(2.0);
36  set_features(train_features);
37  set_labels(train_labels);
38  set_termination(0);
40  set_tolerance(1e-3);
41  set_max_iter(1000);
42  register_parameters();
43 }
44 
46 {
48 }
49 
50 void CFeatureBlockLogisticRegression::register_parameters()
51 {
52  SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
53  SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
54  SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
55  SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
56  SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
57  SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
58  SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
59 }
60 
62 {
64  return m_feature_relation;
65 }
66 
68 {
70  SG_REF(feature_relation);
71  m_feature_relation = feature_relation;
72 }
73 
75 {
76  return m_max_iter;
77 }
78 
80 {
81  return m_regularization;
82 }
83 
85 {
86  return m_termination;
87 }
88 
90 {
91  return m_tolerance;
92 }
93 
95 {
96  return m_z;
97 }
98 
100 {
101  return m_q;
102 }
103 
105 {
106  ASSERT(max_iter>=0);
107  m_max_iter = max_iter;
108 }
109 
111 {
112  ASSERT(regularization==0 || regularization==1);
113  m_regularization = regularization;
114 }
115 
117 {
118  ASSERT(termination>=0 && termination<=4);
119  m_termination = termination;
120 }
121 
123 {
124  ASSERT(tolerance>0.0);
125  m_tolerance = tolerance;
126 }
127 
129 {
130  m_z = z;
131 }
132 
134 {
135  m_q = q;
136 }
137 
139 {
140  if (data && (CDotFeatures*)data)
141  set_features((CDotFeatures*)data);
142 
143  ASSERT(features);
144  ASSERT(m_labels);
145 
146  int32_t n_vecs = m_labels->get_num_labels();
147  SGVector<float64_t> y(n_vecs);
148  for (int32_t i=0; i<n_vecs; i++)
149  y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
150 
151  slep_options options = slep_options::default_options();
152  options.q = m_q;
153  options.regularization = m_regularization;
154  options.termination = m_termination;
155  options.tolerance = m_tolerance;
156  options.max_iter = m_max_iter;
157  options.loss = LOGISTIC;
158 
159  EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
160  switch (relation_type)
161  {
162  case GROUP:
163  {
165  SGVector<index_t> ind = feature_group->get_SLEP_ind();
166  options.ind = ind.vector;
167  options.n_feature_blocks = ind.vlen-1;
168  if (ind[ind.vlen-1] > features->get_num_vectors())
169  SG_ERROR("Group of features covers more vectors than available\n");
170 
171  options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
172  for (int32_t i=0; i<options.n_feature_blocks; i++)
173  options.gWeight[i] = 1.0;
174  options.mode = FEATURE_GROUP;
175  options.loss = LOGISTIC;
176  options.n_nodes = 0;
177  slep_result_t result = slep_solver(features, y.vector, m_z, options);
178 
179  SG_FREE(options.gWeight);
180  int32_t n_feats = features->get_dim_feature_space();
181  SGVector<float64_t> new_w(n_feats);
182  for (int i=0; i<n_feats; i++)
183  new_w[i] = result.w[i];
184  set_bias(result.c[0]);
185 
186  w = new_w;
187  }
188  break;
189  case TREE:
190  {
192 
193  SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
195  if (feature_tree->is_general())
196  {
197  G = feature_tree->get_SLEP_G();
198  options.general = true;
199  }
200  options.ind_t = ind_t.vector;
201  options.G = G.vector;
202  options.n_nodes = ind_t.vlen/3;
203  options.n_feature_blocks = ind_t.vlen/3;
204  options.mode = FEATURE_TREE;
205  options.loss = LOGISTIC;
206 
207  slep_result_t result = slep_solver(features, y.vector, m_z, options);
208 
209  int32_t n_feats = features->get_dim_feature_space();
210  SGVector<float64_t> new_w(n_feats);
211  for (int i=0; i<n_feats; i++)
212  new_w[i] = result.w[i];
213 
214  set_bias(result.c[0]);
215 
216  w = new_w;
217  }
218  break;
219  default:
220  SG_ERROR("Not supported feature relation type\n");
221  }
222 
223  return true;
224 }
225 
227 {
228  return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
229 }
230 
232 {
233  if (data)
234  {
235  if (!data->has_property(FP_DOT))
236  SG_ERROR("Specified features are not of type CDotFeatures\n");
237 
238  set_features((CDotFeatures*) data);
239  }
240 
241  if (!features)
242  return SGVector<float64_t>();
243 
244  int32_t num=features->get_num_vectors();
245  ASSERT(num>0);
247 
248  float64_t* out=SG_MALLOC(float64_t, num);
249  features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
250  for (int32_t i=0; i<num; i++)
251  out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
252  return SGVector<float64_t>(out,num);
253 }
254 
255 }

SHOGUN Machine Learning Toolbox - Documentation